Data 2 : Analysis and Modeling

library(caret)
library(pROC)
library(dplyr)
library("fastDummies")
library(glmnet)
library(rpart)
library(e1071)  
library(class) 
library(rpart)
library(randomForest)
library(datasets)
library(xgboost)
library(DiagrammeR)

df_original <- read.csv('../cleaned_data/cleaning_data-02.csv')
install.packages('vscDebugger')
Warning message:
"package 'vscDebugger' is not available for this version of R

A version of this package for your version of R might be available elsewhere,
see the ideas at
https://cran.r-project.org/doc/manuals/r-patched/R-admin.html#Installing-packages"
colnames(df_original)
  1. 'BEM_ID'
  2. 'P_IDCOR'
  3. 'H_IDNUMBER'
  4. 'HHNO'
  5. 'PERSNO'
  6. 'RELHEAD'
  7. 'A03'
  8. 'A07'
  9. 'A08'
  10. 'A09'
  11. 'A10A'
  12. 'A11'
  13. 'A11Y'
  14. 'A12'
  15. 'A13'
  16. 'A14'
  17. 'A15V1'
  18. 'A15V4'
  19. 'D1A_6AF'
  20. 'D1A_7MF'
  21. 'D1A_7YF'
  22. 'D1A_8F'
  23. 'D1A_9F'
  24. 'D1A_10CF'
  25. 'D1A_1V1F'
  26. 'D1A_1V3F'
  27. 'D1A_1V4F'
  28. 'D1A_1V7F'
  29. 'D1A_1V8F'
  30. 'D1A_1V9F'
  31. 'D1A_6AL'
  32. 'D1A_7ML'
  33. 'D1A_7YL'
  34. 'D1A_8L'
  35. 'D1A_9L'
  36. 'D1A_10CL'
  37. 'D1A_1V1L'
  38. 'D1A_1V3L'
  39. 'D1A_1V4L'
  40. 'D1A_1V7L'
  41. 'D1A_1V8L'
  42. 'D1A_1V9L'
  43. 'D1A_4'
  44. 'D1A_8YF'
  45. 'D1A_8YL'
  46. 'D1B_6AF'
  47. 'D1B_7MF'
  48. 'D1B_7YF'
  49. 'D1B_8F'
  50. 'D1B_9F'
  51. 'D1B_1V1F'
  52. 'D1B_1V4F'
  53. 'D1B_1V6F'
  54. 'D1B_1V7F'
  55. 'D1B_1V8F'
  56. 'D1B_1V9F'
  57. 'D1B_6AL'
  58. 'D1B_7ML'
  59. 'D1B_7YL'
  60. 'D1B_8L'
  61. 'D1B_9L'
  62. 'D1B_1V1L'
  63. 'D1B_1V4L'
  64. 'D1B_1V6L'
  65. 'D1B_1V7L'
  66. 'D1B_1V8L'
  67. 'D1B_1V9L'
  68. 'D1B_4'
  69. 'D1C_6AF'
  70. 'D1C_7MF'
  71. 'D1C_7YF'
  72. 'D1C_8F'
  73. 'D1C_9F'
  74. 'D1C_10CF'
  75. 'D1C_1V1F'
  76. 'D1C_1V3F'
  77. 'D1C_1V4F'
  78. 'D1C_1V5F'
  79. 'D1C_1V6F'
  80. 'D1C_1V7F'
  81. 'D1C_1V8F'
  82. 'D1C_1V9F'
  83. 'D1C_6AL'
  84. 'D1C_7ML'
  85. 'D1C_7YL'
  86. 'D1C_8L'
  87. 'D1C_9L'
  88. 'D1C_10CL'
  89. 'D1C_1V1L'
  90. 'D1C_1V3L'
  91. 'D1C_1V4L'
  92. 'D1C_1V5L'
  93. 'D1C_1V6L'
  94. 'D1C_1V7L'
  95. 'D1C_1V8L'
  96. 'D1C_1V9L'
  97. 'D1C_1V10L'
  98. 'D1C_4'
  99. 'N1_1V1'
  100. 'N1_2'
  101. 'N1_3'
  102. 'N1_6U'
  103. 'N1_6TAKA'
  104. 'N1_7'
  105. 'N1_8'
  106. 'N1_9'
  107. 'N1_10'
  108. 'N1_11'
  109. 'N1_12'
  110. 'N1_13'
  111. 'N1_14'
  112. 'N1_15V1'
  113. 'N1_16'
  114. 'N1_17'
  115. 'O1_1'
  116. 'O1_2'
  117. 'O1_3'
  118. 'O1_4V1'
  119. 'O1_5'
  120. 'O1_6'
  121. 'G1_2S1'
  122. 'G1_3S1'
  123. 'G1_4S1'
  124. 'G1_5A1S1'
  125. 'G1_6S1'
  126. 'G1_2S2'
  127. 'G1_3S2'
  128. 'G1_4S2'
  129. 'G1_5A1S2'
  130. 'G1_6S2'
  131. 'G1_2S3'
  132. 'G1_3S3'
  133. 'G1_4S3'
  134. 'G1_5A1S3'
  135. 'G1_6S3'
  136. 'G1_2SIL1'
  137. 'G1_3SIL1'
  138. 'G1_4SIL1'
  139. 'G1_5A1SIL1'
  140. 'G1_6SIL1'
  141. 'G2_2S1'
  142. 'G2_3S1'
  143. 'G2_4S1'
  144. 'G2_5A1S1'
  145. 'G2_5B1S1'
  146. 'G2_6S1'
  147. 'G2_7S1'
  148. 'DISTRICT'
  149. 'D1A_10AF_3M'
  150. 'D1A_10AL_3M'
  151. 'D1C_10AF_3M'
  152. 'D1C_10AL_3M'
  153. 'A05'
  154. 'MOUZA_GRP'
dim(df_original)
  1. 23506
  2. 154

Out of all the columns, based on Dorato’s paper, and after going into a deep dive of what information each column was showcasing, we decided to specifically choose the columns below which were in synchronisation with the objective of the paper and in terms of the base logistic’s significant variables list.

Base Model: Logistic Regression

# subsetting the data
df1 <-df_original[, c("D1A_1V1L", "A08", "A12", "A13", "A14", "D1A_7ML", "A15V1", "A15V4", "D1A_4", "A11Y" ,"N1_6TAKA", "N1_12", "N1_13", "N1_14", "N1_16", "N1_17", "D1A_10AF_3M", "D1A_10AL_3M", "D1A_8F")]
df1$Age <- 2019 - df1$A08
df1 <-df1[, c("D1A_1V1L", "A12", "A13", "A14", "D1A_7ML", "A15V1", "A15V4", "D1A_4", "A11Y" ,"N1_6TAKA", "N1_12", "N1_13", "N1_14", "N1_16", "N1_17", "D1A_10AF_3M", "D1A_10AL_3M", "D1A_8F")]
# taking out D1A_6AL because too many destinations in list

df1 <-df1[, c("D1A_1V1L", "A12", "A13", "A14", "D1A_7ML", "A15V1", "A15V4", "D1A_4", "A11Y" ,"N1_6TAKA", "N1_12", "N1_13", "N1_14", "N1_16", "N1_17", "D1A_10AF_3M", "D1A_10AL_3M", "D1A_8F")]

df1 <- df1 %>% rename( "Work_Earn_Money" = D1A_1V1L, 
                       "Can_write_letter" = A12, 
                       "Education_Level" = A13, 
                       "Livelihood_Occupation" = A14,
                       "Month_Arrival" = D1A_7ML, 
                       "Migraton_Experience_Internal" = A15V1, 
                       "No_Migration_Experience" = A15V4, 
                       "Number_Trips" = D1A_4, 
                       "Age_First_Marriage" = A11Y,
                       "Paid_in_Taka" = N1_6TAKA, 
                       "Rent_per_Month" = N1_12, 
                       "Food_budget" = N1_13, 
                       "Monthly_Remittances" = N1_14, 
                       "Monthly_Savings" = N1_16, 
                       "Saving_brought_Home" = N1_17, 
                       "Wage_First_Head" = D1A_10AF_3M, 
                       "Wage_Last_Head"= D1A_10AL_3M, 
                       "Duration_of_stay" = D1A_8F)


#converting cateogorical variables to factor
#columns_to_factor <- c("D1A_1V1L", "A12", "A13", "A14", "D1A_7ML", "A15V1", "A15V4")
columns_to_factor <- c("Work_Earn_Money", "Can_write_letter", "Education_Level", "Livelihood_Occupation", "Month_Arrival", "Migraton_Experience_Internal", "No_Migration_Experience")
df1[columns_to_factor] <- lapply(df1[columns_to_factor], factor)

print(head(df1))
  Work_Earn_Money Can_write_letter Education_Level Livelihood_Occupation
1            <NA>                1               4                    14
2            <NA>             <NA>               2                  <NA>
3            <NA>             <NA>            <NA>                  <NA>
4            <NA>                2               2                    17
5            <NA>                1               4                    10
6            <NA>                2               4                    17
  Month_Arrival Migraton_Experience_Internal No_Migration_Experience
1          <NA>                         <NA>                       4
2          <NA>                         <NA>                       4
3          <NA>                         <NA>                       4
4          <NA>                         <NA>                       4
5          <NA>                         <NA>                    <NA>
6          <NA>                            1                    <NA>
  Number_Trips Age_First_Marriage Paid_in_Taka Rent_per_Month Food_budget
1           NA                 NA           NA             NA          NA
2           NA                 NA           NA             NA          NA
3           NA                 NA           NA             NA          NA
4           NA                 13           NA             NA          NA
5           NA                 19           NA             NA          NA
6           NA                 18           NA             NA          NA
  Monthly_Remittances Monthly_Savings Saving_brought_Home Wage_First_Head
1                  NA              NA                  NA              NA
2                  NA              NA                  NA              NA
3                  NA              NA                  NA              NA
4                  NA              NA                  NA              NA
5                  NA              NA                  NA              NA
6                  NA              NA                  NA              NA
  Wage_Last_Head Duration_of_stay
1             NA               NA
2             NA               NA
3             NA               NA
4             NA               NA
5             NA               NA
6             NA               NA
# converting variables to dummy variables
df1 <- fastDummies::dummy_cols(df1, remove_first_dummy = TRUE, remove_selected_columns = TRUE)
print(dim(df1))
print(head(df1))
[1] 23506    59
  Number_Trips Age_First_Marriage Paid_in_Taka Rent_per_Month Food_budget
1           NA                 NA           NA             NA          NA
2           NA                 NA           NA             NA          NA
3           NA                 NA           NA             NA          NA
4           NA                 13           NA             NA          NA
5           NA                 19           NA             NA          NA
6           NA                 18           NA             NA          NA
  Monthly_Remittances Monthly_Savings Saving_brought_Home Wage_First_Head
1                  NA              NA                  NA              NA
2                  NA              NA                  NA              NA
3                  NA              NA                  NA              NA
4                  NA              NA                  NA              NA
5                  NA              NA                  NA              NA
6                  NA              NA                  NA              NA
  Wage_Last_Head Duration_of_stay Work_Earn_Money_1 Work_Earn_Money_NA
1             NA               NA                NA                  1
2             NA               NA                NA                  1
3             NA               NA                NA                  1
4             NA               NA                NA                  1
5             NA               NA                NA                  1
6             NA               NA                NA                  1
  Can_write_letter_2 Can_write_letter_NA Education_Level_2 Education_Level_3
1                  0                   0                 0                 0
2                 NA                   1                 1                 0
3                 NA                   1                NA                NA
4                  1                   0                 1                 0
5                  0                   0                 0                 0
6                  1                   0                 0                 0
  Education_Level_4 Education_Level_5 Education_Level_6 Education_Level_7
1                 1                 0                 0                 0
2                 0                 0                 0                 0
3                NA                NA                NA                NA
4                 0                 0                 0                 0
5                 1                 0                 0                 0
6                 1                 0                 0                 0
  Education_Level_8 Education_Level_9 Education_Level_NA
1                 0                 0                  0
2                 0                 0                  0
3                NA                NA                  1
4                 0                 0                  0
5                 0                 0                  0
6                 0                 0                  0
  Livelihood_Occupation_2 Livelihood_Occupation_3 Livelihood_Occupation_4
1                       0                       0                       0
2                      NA                      NA                      NA
3                      NA                      NA                      NA
4                       0                       0                       0
5                       0                       0                       0
6                       0                       0                       0
  Livelihood_Occupation_5 Livelihood_Occupation_6 Livelihood_Occupation_7
1                       0                       0                       0
2                      NA                      NA                      NA
3                      NA                      NA                      NA
4                       0                       0                       0
5                       0                       0                       0
6                       0                       0                       0
  Livelihood_Occupation_8 Livelihood_Occupation_9 Livelihood_Occupation_10
1                       0                       0                        0
2                      NA                      NA                       NA
3                      NA                      NA                       NA
4                       0                       0                        0
5                       0                       0                        1
6                       0                       0                        0
  Livelihood_Occupation_11 Livelihood_Occupation_12 Livelihood_Occupation_13
1                        0                        0                        0
2                       NA                       NA                       NA
3                       NA                       NA                       NA
4                        0                        0                        0
5                        0                        0                        0
6                        0                        0                        0
  Livelihood_Occupation_14 Livelihood_Occupation_15 Livelihood_Occupation_16
1                        1                        0                        0
2                       NA                       NA                       NA
3                       NA                       NA                       NA
4                        0                        0                        0
5                        0                        0                        0
6                        0                        0                        0
  Livelihood_Occupation_17 Livelihood_Occupation_18 Livelihood_Occupation_19
1                        0                        0                        0
2                       NA                       NA                       NA
3                       NA                       NA                       NA
4                        1                        0                        0
5                        0                        0                        0
6                        1                        0                        0
  Livelihood_Occupation_99 Livelihood_Occupation_NA Month_Arrival_2
1                        0                        0              NA
2                       NA                        1              NA
3                       NA                        1              NA
4                        0                        0              NA
5                        0                        0              NA
6                        0                        0              NA
  Month_Arrival_3 Month_Arrival_4 Month_Arrival_5 Month_Arrival_6
1              NA              NA              NA              NA
2              NA              NA              NA              NA
3              NA              NA              NA              NA
4              NA              NA              NA              NA
5              NA              NA              NA              NA
6              NA              NA              NA              NA
  Month_Arrival_7 Month_Arrival_8 Month_Arrival_9 Month_Arrival_10
1              NA              NA              NA               NA
2              NA              NA              NA               NA
3              NA              NA              NA               NA
4              NA              NA              NA               NA
5              NA              NA              NA               NA
6              NA              NA              NA               NA
  Month_Arrival_11 Month_Arrival_12 Month_Arrival_98 Month_Arrival_NA
1               NA               NA               NA                1
2               NA               NA               NA                1
3               NA               NA               NA                1
4               NA               NA               NA                1
5               NA               NA               NA                1
6               NA               NA               NA                1
  Migraton_Experience_Internal_NA No_Migration_Experience_NA
1                               1                          0
2                               1                          0
3                               1                          0
4                               1                          0
5                               1                          1
6                               0                          1
# removing rows that do not have D1A_1V1L
df1 <- df1[!is.na(df1$Work_Earn_Money_1), ]

#replace NA with 0, 0 meaning "No", assuming that if the participant did not say yes to a question, than it would be no.
df1 <- replace(df1, is.na(df1), 0)
print(head(df1))
   Number_Trips Age_First_Marriage Paid_in_Taka Rent_per_Month Food_budget
11            1                 29        13000              0       12000
33            1                 21        39800              0        3000
38            4                 23         1000           7000       10000
43            1                 21         6000           1500        2000
50            1                 16            0              0           0
56            1                 26         6000           1000        2000
   Monthly_Remittances Monthly_Savings Saving_brought_Home Wage_First_Head
11                   0            1000                   0           18000
33                   0             300                   0           20000
38               10000            2000               30000               0
43                2000             500                   0            6000
50                   0               0                   0               0
56                2000            1000                4000            6000
   Wage_Last_Head Duration_of_stay Work_Earn_Money_1 Work_Earn_Money_NA
11          18000              997                 0                  0
33          20000              997                 0                  0
38              0              156                 1                  0
43           6000               60                 1                  0
50              0               84                 0                  0
56           6000               12                 1                  0
   Can_write_letter_2 Can_write_letter_NA Education_Level_2 Education_Level_3
11                  1                   0                 1                 0
33                  1                   0                 0                 0
38                  1                   0                 1                 0
43                  1                   0                 1                 0
50                  1                   0                 0                 0
56                  0                   0                 0                 1
   Education_Level_4 Education_Level_5 Education_Level_6 Education_Level_7
11                 0                 0                 0                 0
33                 0                 0                 0                 0
38                 0                 0                 0                 0
43                 0                 0                 0                 0
50                 0                 0                 0                 0
56                 0                 0                 0                 0
   Education_Level_8 Education_Level_9 Education_Level_NA
11                 0                 0                  0
33                 0                 0                  0
38                 0                 0                  0
43                 0                 0                  0
50                 0                 0                  0
56                 0                 0                  0
   Livelihood_Occupation_2 Livelihood_Occupation_3 Livelihood_Occupation_4
11                       1                       0                       0
33                       1                       0                       0
38                       0                       0                       0
43                       0                       0                       1
50                       0                       1                       0
56                       0                       0                       1
   Livelihood_Occupation_5 Livelihood_Occupation_6 Livelihood_Occupation_7
11                       0                       0                       0
33                       0                       0                       0
38                       0                       0                       0
43                       0                       0                       0
50                       0                       0                       0
56                       0                       0                       0
   Livelihood_Occupation_8 Livelihood_Occupation_9 Livelihood_Occupation_10
11                       0                       0                        0
33                       0                       0                        0
38                       0                       0                        0
43                       0                       0                        0
50                       0                       0                        0
56                       0                       0                        0
   Livelihood_Occupation_11 Livelihood_Occupation_12 Livelihood_Occupation_13
11                        0                        0                        0
33                        0                        0                        0
38                        0                        0                        1
43                        0                        0                        0
50                        0                        0                        0
56                        0                        0                        0
   Livelihood_Occupation_14 Livelihood_Occupation_15 Livelihood_Occupation_16
11                        0                        0                        0
33                        0                        0                        0
38                        0                        0                        0
43                        0                        0                        0
50                        0                        0                        0
56                        0                        0                        0
   Livelihood_Occupation_17 Livelihood_Occupation_18 Livelihood_Occupation_19
11                        0                        0                        0
33                        0                        0                        0
38                        0                        0                        0
43                        0                        0                        0
50                        0                        0                        0
56                        0                        0                        0
   Livelihood_Occupation_99 Livelihood_Occupation_NA Month_Arrival_2
11                        0                        0               0
33                        0                        0               0
38                        0                        0               0
43                        0                        0               0
50                        0                        0               0
56                        0                        0               0
   Month_Arrival_3 Month_Arrival_4 Month_Arrival_5 Month_Arrival_6
11               0               0               0               0
33               0               0               0               0
38               0               0               0               0
43               0               0               0               0
50               0               0               0               0
56               0               0               0               0
   Month_Arrival_7 Month_Arrival_8 Month_Arrival_9 Month_Arrival_10
11               0               0               0                0
33               0               0               0                0
38               0               0               0                0
43               0               0               0                0
50               0               0               0                0
56               0               0               0                0
   Month_Arrival_11 Month_Arrival_12 Month_Arrival_98 Month_Arrival_NA
11                0                0                1                0
33                0                0                1                0
38                0                0                0                0
43                0                0                1                0
50                0                0                1                0
56                0                0                1                0
   Migraton_Experience_Internal_NA No_Migration_Experience_NA
11                               0                          1
33                               0                          1
38                               0                          1
43                               0                          1
50                               0                          1
56                               0                          1
# partitioning the data 
set.seed(123) # for reproducibility
#train_idx <- createDataPartition(df1$D1A_1V1L, p = 0.8, list = FALSE)
train_idx <- createDataPartition(df1$Work_Earn_Money_1, p = 0.8, list = FALSE)
train <- df1[train_idx,]
test <- df1[-train_idx,] 
#trying a logistic regression
model <- glm(Work_Earn_Money_1 ~ ., data = df1, family = binomial(link='logit'))

# look at summary of logistic regression model
(summary2_lm <- summary(model))

Call:
glm(formula = Work_Earn_Money_1 ~ ., family = binomial(link = "logit"), 
    data = df1)

Coefficients: (6 not defined because of singularities)
                                  Estimate Std. Error z value Pr(>|z|)    
(Intercept)                     -1.108e+01  6.132e+02  -0.018 0.985581    
Number_Trips                     2.102e-01  8.233e-02   2.553 0.010670 *  
Age_First_Marriage              -1.188e-02  1.574e-02  -0.755 0.450284    
Paid_in_Taka                     4.180e-06  2.971e-06   1.407 0.159556    
Rent_per_Month                   1.868e-04  6.995e-05   2.670 0.007590 ** 
Food_budget                     -7.234e-06  3.027e-05  -0.239 0.811126    
Monthly_Remittances              3.018e-04  4.956e-05   6.089 1.13e-09 ***
Monthly_Savings                  6.897e-05  4.061e-05   1.698 0.089426 .  
Saving_brought_Home              1.342e-06  3.328e-06   0.403 0.686694    
Wage_First_Head                 -5.990e-05  3.420e-05  -1.751 0.079872 .  
Wage_Last_Head                   1.371e-04  3.409e-05   4.021 5.80e-05 ***
Duration_of_stay                -1.485e-03  2.789e-04  -5.326 1.01e-07 ***
Work_Earn_Money_NA                      NA         NA      NA       NA    
Can_write_letter_2               3.758e-01  3.685e-01   1.020 0.307896    
Can_write_letter_NA                     NA         NA      NA       NA    
Education_Level_2               -5.993e-01  3.622e-01  -1.655 0.097935 .  
Education_Level_3               -1.055e+00  4.906e-01  -2.150 0.031541 *  
Education_Level_4               -8.061e-01  4.968e-01  -1.622 0.104709    
Education_Level_5               -1.523e+00  5.347e-01  -2.849 0.004385 ** 
Education_Level_6               -1.947e+00  6.565e-01  -2.965 0.003025 ** 
Education_Level_7               -1.826e+00  5.520e-01  -3.309 0.000936 ***
Education_Level_8               -2.470e+00  5.359e-01  -4.609 4.04e-06 ***
Education_Level_9                1.081e+01  8.827e+02   0.012 0.990231    
Education_Level_NA                      NA         NA      NA       NA    
Livelihood_Occupation_2          1.045e+00  6.869e-01   1.521 0.128282    
Livelihood_Occupation_3          1.020e+00  8.858e-01   1.152 0.249324    
Livelihood_Occupation_4          1.825e+00  1.222e+00   1.493 0.135354    
Livelihood_Occupation_5          3.996e-02  9.724e-01   0.041 0.967220    
Livelihood_Occupation_6         -1.519e+00  1.494e+00  -1.017 0.308988    
Livelihood_Occupation_7          1.500e+00  7.491e-01   2.003 0.045199 *  
Livelihood_Occupation_8         -1.623e+00  9.062e-01  -1.792 0.073212 .  
Livelihood_Occupation_9          2.427e+00  8.715e-01   2.785 0.005354 ** 
Livelihood_Occupation_10         9.955e-01  6.808e-01   1.462 0.143667    
Livelihood_Occupation_11         7.487e-01  6.764e-01   1.107 0.268302    
Livelihood_Occupation_12        -3.007e-01  7.075e-01  -0.425 0.670858    
Livelihood_Occupation_13         8.573e-01  6.629e-01   1.293 0.195943    
Livelihood_Occupation_14         6.300e-01  1.027e+00   0.613 0.539726    
Livelihood_Occupation_15         1.257e+00  7.156e-01   1.756 0.079036 .  
Livelihood_Occupation_16         1.174e+00  9.175e-01   1.280 0.200666    
Livelihood_Occupation_17        -2.769e+00  6.844e-01  -4.046 5.21e-05 ***
Livelihood_Occupation_18         8.129e-01  8.976e-01   0.906 0.365155    
Livelihood_Occupation_19         6.192e-01  8.443e-01   0.733 0.463344    
Livelihood_Occupation_99                NA         NA      NA       NA    
Livelihood_Occupation_NA                NA         NA      NA       NA    
Month_Arrival_2                 -9.650e-02  5.036e-01  -0.192 0.848045    
Month_Arrival_3                 -8.094e-01  5.129e-01  -1.578 0.114536    
Month_Arrival_4                 -3.629e-01  6.851e-01  -0.530 0.596311    
Month_Arrival_5                 -1.054e+00  5.900e-01  -1.787 0.073955 .  
Month_Arrival_6                 -1.516e-01  5.567e-01  -0.272 0.785452    
Month_Arrival_7                  6.641e-01  7.465e-01   0.890 0.373677    
Month_Arrival_8                  1.228e-01  7.708e-01   0.159 0.873387    
Month_Arrival_9                  1.004e+00  7.811e-01   1.286 0.198474    
Month_Arrival_10                -1.708e+00  8.719e-01  -1.959 0.050060 .  
Month_Arrival_11                 2.070e-01  8.759e-01   0.236 0.813144    
Month_Arrival_12                -5.850e-01  7.528e-01  -0.777 0.437093    
Month_Arrival_98                -5.213e-01  3.736e-01  -1.395 0.162918    
Month_Arrival_NA                        NA         NA      NA       NA    
Migraton_Experience_Internal_NA  2.354e+01  1.075e+03   0.022 0.982529    
No_Migration_Experience_NA       1.308e+01  6.132e+02   0.021 0.982988    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 1745.4  on 1999  degrees of freedom
Residual deviance: 1028.3  on 1947  degrees of freedom
AIC: 1134.3

Number of Fisher Scoring iterations: 13

Though we focused on subsetting the data in the inital go by looking at the data description, we used logistic regression on the subsetted data to find the significant variables in comparison and used those.

# select the features that are significant for the baseline model and other models

df_model <- df1[ , c('Work_Earn_Money_1', 'Number_Trips', 'Rent_per_Month', 'Monthly_Remittances', 'Wage_Last_Head', 'Duration_of_stay', 'Education_Level_3', 'Education_Level_5', 'Education_Level_6', 'Education_Level_7', 'Education_Level_8', 'Livelihood_Occupation_7', 'Livelihood_Occupation_9', 'Livelihood_Occupation_17')]

set.seed(123) # for reproducibility
train_idx <- createDataPartition(df_model$Work_Earn_Money_1, p = 0.8, list = FALSE)
train <- df_model[train_idx,]
test <- df_model[-train_idx,]
# Create model with predictors found from linear model
#model2 <- glm(Work_Earn_Money_1 ~ D1A_4 + N1_12 + N1_14 + D1A_10AL_3M + D1A_8F + A13_3 + A13_5 + 
                #A13_6 + A13_7 + A13_8 + A14_7 + A14_9 + A14_17, data = train, family = binomial)

# Create model with predictors found from linear model
model2 <- glm(Work_Earn_Money_1 ~ ., data = train, family = binomial(link='logit'))



#model2 <- glm(Work_Earn_Money_1 ~ D1A_4 + N1_12 + N1_14 + D1A_10AL_3M + D1A_8F + A13_3 + A13_5 + 
                #A13_6 + A13_7 + A13_8 + A14_7 + A14_9 + A14_17, data = train, family = binomial)


# predictions on test data
predictions <- predict(model2, newdata = test, type = "response")

# convert probabilities to class labels, assign greater than 0.5 to Positive
pred_classes <- ifelse(predictions > 0.5, "Positive", "Negative")

# confusion matrix
cm <- table(Actual = test$Work_Earn_Money_1, Predicted = pred_classes)

# Confusion Matrix
print(cm)

roc_curve_lr <- roc(test$Work_Earn_Money_1, predictions, legacy.axes = FALSE)

# Plot ROC curve
plot(roc_curve_lr, main = "ROC Curve", col = "blue",  legacy.axes = T) 


# Save AUC
auc_lr <- round(auc(roc_curve_lr), 2)
      Predicted
Actual Negative Positive
     0       34       31
     1       10      325
Setting levels: control = 0, case = 1

Setting direction: controls < cases

# Calculate True Positives (TP), True Negatives (TN), False Positives (FP), False Negatives (FN)


TP <- 325
TN <- 34
FP <- 41
FN <- 10


# Calculate accuracy
accuracy <- (TP + TN) / sum(cm)

# Calculate precision
precision <- TP / (TP + FP)

# Calculate recall (also called sensitivity)
recall <- TP / (TP + FN)

# Calculate F1 score
F1 <- 2 * (precision * recall) / (precision + recall)

#Specificity
# Calculate specificity
specificity <- (TN / (TN + FP))

# Calculate AIC
aic <- AIC(model2)

# Calculate BIC
bic <- BIC(model2)

# Print AIC and BIC
print(paste("AIC:", aic))
print(paste("BIC:", bic))

# Print the metrics
cat("Accuracy:", accuracy, "\n")
cat("Precision:", precision, "\n")
cat("Recall:", recall, "\n")
cat("F1 Score:", F1, "\n")
print(paste("Specificity:", specificity))
[1] "AIC: 918.394533143904"
[1] "BIC: 993.683157859094"
Accuracy: 0.8975 
Precision: 0.8879781 
Recall: 0.9701493 
F1 Score: 0.9272468 
[1] "Specificity: 0.453333333333333"

Feature Selection/Interesting Findings

Final Conclusions

The following are features are predictors of D1A_1V1L: Internal: Primary purpose of trip: work/earn money - Last, Head.

D1A_4 2.102e-01 8.233e-02 2.553 0.010670 - D1A_4: Internal: Total number of trips - Head

N1_12 1.868e-04 6.995e-05 2.670 0.007590 - N1_12: Internal: Rent per month

N1_14 3.018e-04 4.956e-05 6.089 1.13e-09 *** -> N1_14: Internal: Average monthly remittances sent home

D1A_10AL_3M 1.371e-04 3.409e-05 4.021 5.80e-05*** -> D1A_10AL_3M: Internal: Wage(taka)- Last, Head Monthly

D1A_8F -1.485e-03 2.789e-04 -5.326 1.01e-07 *** -> D1A_8F: Internal: Duration of stay - First, Head

A13_3 -1.055e+00 4.906e-01 -2.150 0.031541*** **-> A13: Household: Level of education (Highest level passed) - 3 Class V (Complete PE)

A13_5 -1.523e+00 5.347e-01 -2.849 0.004385** -> A13: Household: Level of education (Highest level passed) - 5 SSC (Complete SE)

A13_6 -1.947e+00 6.565e-01 -2.965 0.003025 -> A13: Household: Level of education (Highest level passed) - 6 College (11 and 12 grades)

A13_7 -1.826e+00 5.520e-01 -3.309 0.000936 -> A13: Household: Level of education (Highest level passed) - 7 HSC (complete HSE)

A13_8 -2.470e+00 5.359e-01 -4.609 4.04e-06 -> A13: Household: Level of education (Highest level passed) - 8 University level

A14_7 1.500e+00 7.491e-01 2.003 0.045199 -> A14: Household: Livelihood/occupation - 7 Rickshaw driver/ Brick breaking/Road building/Construction worker/boatman/earth

A14_9 2.427e+00 8.715e-01 2.785 0.005354 -> 9 Non agricultural worker(factory worker, blue collar service)

A14_17 -2.769e+00 6.844e-01 -4.046 5.21e-05 -> 17 Homemaker

Note about AIC and BIC for Ridge, Lasso and Elastic Net

AIC and BIC are best suited for models where parameters are estimated via likelihood methods, typically in more traditional statistical models like linear and logistic regression models. For models incorporating regularization (like Ridge, Lasso and Elastic Net) or non-parametric models (like Decision Trees and Random Forests), these criteria are generally not applicable unless approximations or modifications to the original criteria are used. Hence, although we decided to use this as a method of comparison, it will lead to biased outcome.

Ridge

# prepare x and y
x <- model.matrix(Work_Earn_Money_1 ~ ., df_model)[, ]
y <- df_model$Work_Earn_Money_1

# split data into train and test sets
set.seed(1)
train <- sample(c(TRUE, FALSE), nrow(df_model), replace = TRUE, prob = c(0.8, 0.2))

test <- !train
y.test <- y[test]

# Ridge regression model
ridge.mod <- glmnet(x[train, ], y[train], alpha = 0, lambda.min.ratio = 0.000001)

# Cross-validation for selecting lambda
cv.out <- cv.glmnet(x[train, ], y[train], alpha = 0, lambda.min.ratio = 0.000001)
bestlam <- cv.out$lambda.min

# Predict the test data with the best lambda
ridge.pred <- predict(ridge.mod, s = bestlam, newx = x[test, ])

# Assuming predictions are probabilities, convert them to binary classes
predicted_classes <- ifelse(ridge.pred > 0.5, 1, 0)

# Confusion matrix
conf_matrix <- table(predicted_classes, y.test)

# Calculate Sensitivity (True Positive Rate)
sensitivity <- conf_matrix[2, 2] / sum(conf_matrix[2, ])

# Calculate Specificity (True Negative Rate)
specificity <- conf_matrix[1, 1] / sum(conf_matrix[1, ])

# Calculate Accuracy
accuracy <- sum(diag(conf_matrix)) / sum(conf_matrix)

# Calculate Precision (Positive Predictive Value)
precision <- conf_matrix[2, 2] / sum(conf_matrix[, 2])

# Calculate Recall (Same as Sensitivity)
recall <- sensitivity

# Calculate F1
F1 <- 2 * (precision * recall) / (precision + recall)

# Calculate False Positive Rate
fpr <- 1 - specificity

# Create ROC Curve
roc_curve_ridge <- roc(y.test, ridge.pred)

# Calculate AUC (Area Under Curve)
auc_value_ridge <- auc(roc_curve_ridge)
roc_curve <- roc(y.test, ridge.pred)

# Calculate AUC (Area Under Curve)
auc_value <- auc(roc_curve)

# Print the metrics
print("Metrics:")
print(paste("Sensitivity (True Positive Rate):", sensitivity))
print(paste("Specificity (True Negative Rate):", specificity))
print(paste("Accuracy:", accuracy))
print(paste("Precision (Positive Predictive Value):", precision))
print(paste("Recall:", recall))
print(paste("False Positive Rate:", fpr))
print(paste("F1:", F1))

print(paste("AUC (Area Under Curve):", auc_value_ridge))

# Plot ROC curve
plot(roc_curve_ridge, main = "ROC Curve", col = "blue")
print(paste("AUC (Area Under Curve):", auc_value))

# Plot ROC curve
plot(roc_curve, main = "ROC Curve", col = "blue")

coef(ridge.mod)
Setting levels: control = 0, case = 1

Warning message in roc.default(y.test, ridge.pred):
"Deprecated use a matrix as predictor. Unexpected results may be produced, please pass a numeric vector."
Setting direction: controls < cases

Setting levels: control = 0, case = 1

Warning message in roc.default(y.test, ridge.pred):
"Deprecated use a matrix as predictor. Unexpected results may be produced, please pass a numeric vector."
Setting direction: controls < cases

  [[ suppressing 84 column names 's0', 's1', 's2' ... ]]
[1] "Metrics:"
[1] "Sensitivity (True Positive Rate): 0.917808219178082"
[1] "Specificity (True Negative Rate): 0.674418604651163"
[1] "Accuracy: 0.892156862745098"
[1] "Precision (Positive Predictive Value): 0.959885386819484"
[1] "Recall: 0.917808219178082"
[1] "False Positive Rate: 0.325581395348837"
[1] "F1: 0.938375350140056"
[1] "AUC (Area Under Curve): 0.886066728182216"
[1] "AUC (Area Under Curve): 0.886066728182216"
15 x 84 sparse Matrix of class "dgCMatrix"
                                                                  
(Intercept)               8.385678e-01  8.382639e-01  8.382188e-01
(Intercept)               .             .             .           
Number_Trips              2.324851e-38  5.618277e-05  6.454707e-05
Rent_per_Month            2.308136e-41  5.578948e-08  6.409709e-08
Monthly_Remittances       2.880487e-41  6.968164e-08  8.006785e-08
Wage_Last_Head            7.445401e-42  1.799846e-08  2.067902e-08
Duration_of_stay         -9.695762e-41 -2.351791e-07 -2.703411e-07
Education_Level_3        -5.997023e-38 -1.449509e-04 -1.665353e-04
Education_Level_5         3.124446e-39  7.079255e-06  8.052202e-06
Education_Level_6        -1.557239e-37 -3.772600e-04 -4.335859e-04
Education_Level_7        -6.587286e-38 -1.601058e-04 -1.840992e-04
Education_Level_8        -6.543591e-38 -1.594983e-04 -1.834782e-04
Livelihood_Occupation_7   1.248436e-37  3.025307e-04  3.477130e-04
Livelihood_Occupation_9   1.509607e-37  3.655498e-04  4.200978e-04
Livelihood_Occupation_17 -6.955422e-37 -1.684579e-03 -1.936013e-03
                                                                  
(Intercept)               8.381670e-01  8.381075e-01  8.380394e-01
(Intercept)               .             .             .           
Number_Trips              7.414820e-05  8.516631e-05  9.780698e-05
Rent_per_Month            7.363373e-08  8.457863e-08  9.713638e-08
Monthly_Remittances       9.199374e-08  1.056849e-07  1.213990e-07
Wage_Last_Head            2.375626e-08  2.728808e-08  3.134058e-08
Duration_of_stay         -3.107503e-07 -3.571866e-07 -4.105445e-07
Education_Level_3        -1.913127e-04 -2.197490e-04 -2.523755e-04
Education_Level_5         9.142965e-06  1.036035e-05  1.171166e-05
Education_Level_6        -4.982921e-04 -5.726159e-04 -6.579746e-04
Education_Level_7        -2.116911e-04 -2.434219e-04 -2.799135e-04
Education_Level_8        -2.110791e-04 -2.428528e-04 -2.794364e-04
Livelihood_Occupation_7   3.996219e-04  4.592517e-04  5.277420e-04
Livelihood_Occupation_9   4.827519e-04  5.547061e-04  6.373264e-04
Livelihood_Occupation_17 -2.224830e-03 -2.556542e-03 -2.937459e-03
                                                                  
(Intercept)               8.379613e-01  8.378718e-01  8.377694e-01
(Intercept)               .             .             .           
Number_Trips              1.123045e-04  1.289256e-04  1.479730e-04
Rent_per_Month            1.115402e-07  1.280556e-07  1.469845e-07
Monthly_Remittances       1.394304e-07  1.601147e-07  1.838340e-07
Wage_Last_Head            3.598910e-08  4.131949e-08  4.742932e-08
Duration_of_stay         -4.718502e-07 -5.422800e-07 -6.231822e-07
Education_Level_3        -2.897982e-04 -3.327068e-04 -3.818857e-04
Education_Level_5         1.320164e-05  1.483072e-05  1.659369e-05
Education_Level_6        -7.559901e-04 -8.685180e-04 -9.976784e-04
Education_Level_7        -3.218815e-04 -3.701499e-04 -4.256664e-04
Education_Level_8        -3.215663e-04 -3.700946e-04 -4.260066e-04
Livelihood_Occupation_7   6.063972e-04  6.967100e-04  8.003872e-04
Livelihood_Occupation_9   7.321754e-04  8.410382e-04  9.659526e-04
Livelihood_Occupation_17 -3.374800e-03 -3.876816e-03 -4.452932e-03
                                                                  
(Intercept)               8.376521e-01  8.375181e-01  8.373650e-01
(Intercept)               .             .             .           
Number_Trips              1.697904e-04  1.947667e-04  2.233412e-04
Rent_per_Month            1.686694e-07  1.934984e-07  2.219101e-07
Monthly_Remittances       2.110231e-07  2.421757e-07  2.778512e-07
Wage_Last_Head            5.442941e-08  6.244532e-08  7.161902e-08
Duration_of_stay         -7.161008e-07 -8.228034e-07 -9.453120e-07
Education_Level_3        -4.382250e-04 -5.027330e-04 -5.765493e-04
Education_Level_5         1.847582e-05  2.044968e-05  2.246978e-05
Education_Level_6        -1.145893e-03 -1.315923e-03 -1.510916e-03
Education_Level_7        -4.895225e-04 -5.629745e-04 -6.474690e-04
Education_Level_8        -4.904442e-04 -5.647308e-04 -6.504018e-04
Livelihood_Occupation_7   9.193786e-04  1.055909e-03  1.212516e-03
Livelihood_Occupation_9   1.109243e-03  1.273555e-03  1.461899e-03
Livelihood_Occupation_17 -5.113903e-03 -5.871986e-03 -6.741130e-03
                                                                  
(Intercept)               8.371903e-01  8.369911e-01  8.367642e-01
(Intercept)               .             .             .           
Number_Trips              2.560083e-04  2.933232e-04  3.359064e-04
Rent_per_Month            2.543988e-07  2.915202e-07  3.338963e-07
Monthly_Remittances       3.186824e-07  3.653831e-07  4.187560e-07
Wage_Last_Head            8.211062e-08  9.410019e-08  1.077895e-07
Duration_of_stay         -1.085938e-06 -1.247320e-06 -1.432469e-06
Education_Level_3        -6.609585e-04 -7.574046e-04 -8.675050e-04
Education_Level_5         2.446580e-05  2.633352e-05  2.792301e-05
Education_Level_6        -1.734453e-03 -1.990601e-03 -2.283975e-03
Education_Level_7        -7.446717e-04 -8.565008e-04 -9.851654e-04
Education_Level_8        -7.492403e-04 -8.633189e-04 -9.950490e-04
Livelihood_Occupation_7   1.392087e-03  1.597903e-03  1.833689e-03
Livelihood_Occupation_9   1.677691e-03  1.924803e-03  2.207612e-03
Livelihood_Occupation_17 -7.737191e-03 -8.878155e-03 -1.018439e-02
                                                                  
(Intercept)               8.365063e-01  8.362135e-01  8.358817e-01
(Intercept)               .             .             .           
Number_Trips              3.844490e-04  4.397164e-04  5.025515e-04
Rent_per_Month            3.822210e-07  4.372649e-07  4.998784e-07
Monthly_Remittances       4.797010e-07  5.492227e-07  6.284378e-07
Wage_Last_Head            1.234038e-07  1.411934e-07  1.614351e-07
Duration_of_stay         -1.644815e-06 -1.888260e-06 -2.167237e-06
Education_Level_3        -9.930641e-04 -1.136086e-03 -1.298786e-03
Education_Level_5         2.902318e-05  2.934181e-05  2.847990e-05
Education_Level_6        -2.619795e-03 -3.003957e-03 -3.443098e-03
Education_Level_7        -1.133209e-03 -1.303562e-03 -1.499597e-03
Education_Level_8        -1.147239e-03 -1.323161e-03 -1.526633e-03
Livelihood_Occupation_7   2.103659e-03  2.412573e-03  2.765789e-03
Livelihood_Occupation_9   2.531052e-03  2.900668e-03  3.322673e-03
Livelihood_Occupation_17 -1.167890e-02 -1.338759e-02 -1.533958e-02
                                                                  
(Intercept)               8.355065e-01  8.350834e-01  8.346074e-01
(Intercept)               .             .             .           
Number_Trips              5.738760e-04  6.546891e-04  7.460643e-04
Rent_per_Month            5.709950e-07  6.516310e-07  7.428841e-07
Monthly_Remittances       7.185805e-07  8.210069e-07  9.371954e-07
Wage_Last_Head            1.844329e-07  2.105184e-07  2.400507e-07
Duration_of_stay         -2.486767e-06 -2.852532e-06 -3.270935e-06
Education_Level_3        -1.483594e-03 -1.693163e-03 -1.930360e-03
Education_Level_5         2.589893e-05  2.087948e-05  1.246938e-05
Education_Level_6        -3.944672e-03 -4.517014e-03 -5.169411e-03
Education_Level_7        -1.725196e-03 -1.984826e-03 -2.283622e-03
Education_Level_8        -1.762109e-03 -2.034791e-03 -2.350748e-03
Livelihood_Occupation_7   3.169318e-03  3.629872e-03  4.154914e-03
Livelihood_Occupation_9   3.803994e-03  4.352318e-03  4.976126e-03
Livelihood_Occupation_17 -1.756743e-02 -2.010744e-02 -2.299987e-02
                                                                  
(Intercept)               8.340737e-01  8.334775e-01  8.328141e-01
(Intercept)               .             .             .           
Number_Trips              8.491404e-04  9.651092e-04  1.095196e-03
Rent_per_Month            8.459274e-07  9.619999e-07  1.092391e-06
Monthly_Remittances       1.068745e-06  1.217364e-06  1.384861e-06
Wage_Last_Head            2.734140e-07  3.110158e-07  3.532815e-07
Duration_of_stay         -3.749177e-06 -4.295317e-06 -4.918340e-06
Education_Level_3        -2.198259e-03 -2.500116e-03 -2.839338e-03
Education_Level_5        -5.805894e-07 -1.989550e-05 -4.757148e-05
Education_Level_6        -5.912160e-03 -6.756619e-03 -7.715236e-03
Education_Level_7        -2.627483e-03 -3.023177e-03 -3.478451e-03
Education_Level_8        -2.717063e-03 -3.141988e-03 -3.635126e-03
Livelihood_Occupation_7   4.752689e-03  5.432238e-03  6.203392e-03
Livelihood_Occupation_9   5.684710e-03  6.488166e-03  7.397354e-03
Livelihood_Occupation_17 -2.628910e-02 -3.002378e-02 -3.425681e-02
                                                                  
(Intercept)               8.320794e-01  8.312701e-01  8.303839e-01
(Intercept)               .             .             .           
Number_Trips              1.240633e-03  1.402626e-03  1.582307e-03
Rent_per_Month            1.238421e-06  1.401411e-06  1.582649e-06
Monthly_Remittances       1.573116e-06  1.784050e-06  2.019579e-06
Wage_Last_Head            4.006485e-07  4.535576e-07  5.124421e-07
Duration_of_stay         -5.628205e-06 -6.435882e-06 -7.353364e-06
Education_Level_3        -3.219437e-03 -3.643968e-03 -4.116456e-03
Education_Level_5        -8.628902e-05 -1.394453e-04 -2.113051e-04
Education_Level_6        -8.801558e-03 -1.003021e-02 -1.141681e-02
Education_Level_7        -4.002158e-03 -4.604380e-03 -5.296558e-03
Education_Level_8        -4.207619e-03 -4.872348e-03 -5.644136e-03
Livelihood_Occupation_7   7.076724e-03  8.063454e-03  9.175292e-03
Livelihood_Occupation_9   8.423820e-03  9.579651e-03  1.087728e-02
Livelihood_Occupation_17 -3.904521e-02 -4.444976e-02 -5.053450e-02
                                                                  
(Intercept)               8.294200e-01  8.283799e-01  8.272672e-01
(Intercept)               .             .             .           
Number_Trips              1.780688e-03  1.998595e-03  2.236604e-03
Rent_per_Month            1.783347e-06  2.004596e-06  2.247304e-06
Monthly_Remittances       2.281557e-06  2.571703e-06  2.891515e-06
Wage_Last_Head            5.777151e-07  6.497546e-07  7.288867e-07
Duration_of_stay         -8.393637e-06 -9.570611e-06 -1.089899e-05
Education_Level_3        -4.640307e-03 -5.218709e-03 -5.854533e-03
Education_Level_5        -3.071692e-04 -4.335539e-04 -5.983771e-04
Education_Level_6        -1.297790e-02 -1.473070e-02 -1.669293e-02
Education_Level_7        -6.091607e-03 -7.004027e-03 -8.049968e-03
Education_Level_8        -6.539936e-03 -7.578984e-03 -8.782908e-03
Livelihood_Occupation_7   1.042420e-02  1.182204e-02  1.338014e-02
Livelihood_Occupation_9   1.232916e-02  1.394744e-02  1.574340e-02
Livelihood_Occupation_17 -5.736584e-02 -6.501138e-02 -7.353840e-02
                                                                  
(Intercept)               8.260886e-01  8.248543e-01  8.235783e-01
(Intercept)               .             .             .           
Number_Trips              2.494967e-03  2.773544e-03  3.071736e-03
Rent_per_Month            2.512148e-06  2.799511e-06  3.109435e-06
Monthly_Remittances       3.242172e-06  3.624424e-06  4.038477e-06
Wage_Last_Head            8.153700e-07  9.093783e-07  1.010988e-06
Duration_of_stay         -1.239404e-05 -1.407132e-05 -1.594626e-05
Education_Level_3        -6.550234e-03 -7.307763e-03 -8.128506e-03
Education_Level_5        -8.111367e-04 -1.083067e-03 -1.427256e-03
Education_Level_6        -1.888243e-02 -2.131679e-02 -2.401283e-02
Education_Level_7        -9.247255e-03 -1.061535e-02 -1.217521e-02
Education_Level_8        -1.017574e-02 -1.178381e-02 -1.363551e-02
Livelihood_Occupation_7   1.510869e-02  1.701605e-02  1.910787e-02
Livelihood_Occupation_9   1.772686e-02  1.990546e-02  2.228385e-02
Livelihood_Occupation_17 -8.301185e-02 -9.349197e-02 -1.050315e-01
                                                                  
(Intercept)               8.222783e-01  8.209765e-01  8.196987e-01
(Intercept)               .             .             .           
Number_Trips              3.388496e-03  3.722086e-03  4.070348e-03
Rent_per_Month            3.441586e-06  3.795176e-06  4.169010e-06
Monthly_Remittances       4.483872e-06  4.959404e-06  5.463007e-06
Wage_Last_Head            1.120185e-06  1.236801e-06  1.360599e-06
Duration_of_stay         -1.803363e-05 -2.034691e-05 -2.289758e-05
Education_Level_3        -9.013575e-03 -9.962711e-03 -1.097583e-02
Education_Level_5        -1.859023e-03 -2.394735e-03 -3.053170e-03
Education_Level_6        -2.698629e-02 -3.025033e-02 -3.381597e-02
Education_Level_7        -1.394931e-02 -1.596030e-02 -1.823170e-02
Education_Level_8        -1.576093e-02 -1.819083e-02 -2.095624e-02
Livelihood_Occupation_7   2.138612e-02  2.384813e-02  2.648553e-02
Livelihood_Occupation_9   2.486278e-02  2.763817e-02  3.060031e-02
Livelihood_Occupation_17 -1.176725e-01 -1.314432e-01 -1.463542e-01
                                                                  
(Intercept)               8.184738e-01  8.173334e-01  8.163102e-01
(Intercept)               .             .             .           
Number_Trips              4.430562e-03  4.799548e-03  5.173756e-03
Rent_per_Month            4.561451e-06  4.970439e-06  5.393523e-06
Monthly_Remittances       5.991716e-06  6.541652e-06  7.108055e-06
Wage_Last_Head            1.491230e-06  1.628272e-06  1.771247e-06
Duration_of_stay         -2.569428e-05 -2.874195e-05 -3.204101e-05
Education_Level_3        -1.205221e-02 -1.319086e-02 -1.439058e-02
Education_Level_5        -3.854295e-03 -4.819005e-03 -5.968482e-03
Education_Level_6        -3.769072e-02 -4.187804e-02 -4.637667e-02
Education_Level_7        -2.078642e-02 -2.364604e-02 -2.682969e-02
Education_Level_8        -2.408702e-02 -2.761060e-02 -3.155052e-02
Livelihood_Occupation_7   2.928336e-02  3.221945e-02  3.526409e-02
Livelihood_Occupation_9   3.373316e-02  3.701388e-02  4.041273e-02
Livelihood_Occupation_17 -1.623953e-01 -1.795328e-01 -1.977071e-01
                                                                  
(Intercept)               8.154369e-01  8.147444e-01  8.142602e-01
(Intercept)               .             .             .           
Number_Trips              5.549395e-03  5.922573e-03  6.289457e-03
Rent_per_Month            5.827899e-06  6.270454e-06  6.717814e-06
Monthly_Remittances       7.685380e-06  8.267449e-06  8.847650e-06
Wage_Last_Head            1.919655e-06  2.072999e-06  2.230812e-06
Duration_of_stay         -3.558661e-05 -3.936805e-05 -4.336845e-05
Education_Level_3        -1.565014e-02 -1.696830e-02 -1.834379e-02
Education_Level_5        -7.323377e-03 -8.902840e-03 -1.072344e-02
Education_Level_6        -5.118012e-02 -5.627630e-02 -6.164724e-02
Education_Level_7        -3.035295e-02 -3.422656e-02 -3.845524e-02
Education_Level_8        -3.592483e-02 -4.074461e-02 -4.601253e-02
Livelihood_Occupation_7   3.838026e-02  4.152443e-02  4.464801e-02
Livelihood_Occupation_9   4.389352e-02  4.741440e-02  5.092934e-02
Livelihood_Occupation_17 -2.168314e-01 -2.367921e-01 -2.574497e-01
                                                                  
(Intercept)               8.140068e-01  8.140005e-01  8.142502e-01
(Intercept)               .             .             .           
Number_Trips              6.646422e-03  6.990189e-03  7.317933e-03
Rent_per_Month            7.166391e-06  7.612437e-06  8.052100e-06
Monthly_Remittances       9.419185e-06  9.975340e-06  1.050976e-05
Wage_Last_Head            2.392673e-06  2.558210e-06  2.727092e-06
Duration_of_stay         -4.756470e-05 -5.192784e-05 -5.642373e-05
Education_Level_3        -1.977519e-02 -2.126074e-02 -2.279805e-02
Education_Level_5        -1.279801e-02 -1.513451e-02 -1.773498e-02
Education_Level_6        -6.726918e-02 -7.311271e-02 -7.914327e-02
Education_Level_7        -4.303655e-02 -4.796006e-02 -5.320673e-02
Education_Level_8        -5.172179e-02 -5.785524e-02 -6.438506e-02
Livelihood_Occupation_7   4.769939e-02  5.062646e-02  5.337941e-02
Livelihood_Occupation_9   5.438999e-02  5.774787e-02  6.095682e-02
Livelihood_Occupation_17 -2.786426e-01 -3.001908e-01 -3.219018e-01
                                                                  
(Intercept)               8.147571e-01  8.155142e-01  8.165134e-01
(Intercept)               .             .             .           
Number_Trips              7.627359e-03  7.916738e-03  8.184604e-03
Rent_per_Month            8.481500e-06  8.896816e-06  9.291077e-06
Monthly_Remittances       1.101673e-05  1.149136e-05  1.192815e-05
Wage_Last_Head            2.899007e-06  3.073633e-06  3.251172e-06
Duration_of_stay         -6.101408e-05 -6.565776e-05 -7.031273e-05
Education_Level_3        -2.438376e-02 -2.601324e-02 -2.768723e-02
Education_Level_5        -2.059472e-02 -2.370169e-02 -2.704191e-02
Education_Level_6        -8.532176e-02 -9.160539e-02 -9.795535e-02
Education_Level_7        -5.874872e-02 -6.454964e-02 -7.057008e-02
Education_Level_8        -7.127287e-02 -7.847031e-02 -8.592235e-02
Livelihood_Occupation_7   5.591353e-02  5.819174e-02  6.018674e-02
Livelihood_Occupation_9   6.397531e-02  6.676847e-02  6.931066e-02
Livelihood_Occupation_17 -3.435776e-01 -3.650212e-01 -3.860440e-01
                                                                  
(Intercept)               8.177230e-01  8.191208e-01  8.206759e-01
(Intercept)               .             .             .           
Number_Trips              8.430714e-03  8.654767e-03  8.857046e-03
Rent_per_Month            9.666344e-06  1.001732e-05  1.034164e-05
Monthly_Remittances       1.232707e-05  1.268541e-05  1.300274e-05
Wage_Last_Head            3.430238e-06  3.610656e-06  3.791710e-06
Duration_of_stay         -7.493601e-05 -7.948719e-05 -8.392849e-05
Education_Level_3        -2.938629e-02 -3.110601e-02 -3.283533e-02
Education_Level_5        -3.057946e-02 -3.428509e-02 -3.812014e-02
Education_Level_6        -1.043137e-01 -1.106375e-01 -1.168800e-01
Education_Level_7        -7.675141e-02 -8.304108e-02 -8.937992e-02
Education_Level_8        -9.356118e-02 -1.013185e-01 -1.091214e-01
Livelihood_Occupation_7   6.188149e-02  6.327064e-02  6.435941e-02
Livelihood_Occupation_9   7.158294e-02  7.357805e-02  7.529710e-02
Livelihood_Occupation_17 -4.064710e-01 -4.261479e-01 -4.449437e-01
                                                                  
(Intercept)               8.223548e-01  8.241230e-01  8.259465e-01
(Intercept)               .             .             .           
Number_Trips              9.038195e-03  9.199151e-03  9.341077e-03
Rent_per_Month            1.063757e-05  1.090413e-05  1.114109e-05
Monthly_Remittances       1.327971e-05  1.351792e-05  1.371968e-05
Wage_Last_Head            3.972539e-06  4.152149e-06  4.329450e-06
Duration_of_stay         -8.822615e-05 -9.235121e-05 -9.627995e-05
Education_Level_3        -3.456198e-02 -3.627289e-02 -3.795476e-02
Education_Level_5        -4.204200e-02 -4.600590e-02 -4.996673e-02
Education_Level_6        -1.229962e-01 -1.289439e-01 -1.346852e-01
Education_Level_7        -9.570756e-02 -1.019646e-01 -1.080945e-01
Education_Level_8        -1.168965e-01 -1.245720e-01 -1.320801e-01
Livelihood_Occupation_7   6.516252e-02  6.570248e-02  6.600751e-02
Livelihood_Occupation_9   7.674924e-02  7.795034e-02  7.892124e-02
Livelihood_Occupation_17 -4.627539e-01 -4.795012e-01 -4.951352e-01
                                                                  
(Intercept)               8.277929e-01  8.296329e-01  8.314403e-01
(Intercept)               .             .             .           
Number_Trips              9.465300e-03  9.573253e-03  9.666428e-03
Rent_per_Month            1.134892e-05  1.152876e-05  1.168228e-05
Monthly_Remittances       1.388788e-05  1.402580e-05  1.413691e-05
Wage_Last_Head            4.503292e-06  4.672526e-06  4.836048e-06
Duration_of_stay         -9.999412e-05 -1.034809e-04 -1.067325e-04
Education_Level_3        -3.959463e-02 -4.118041e-02 -4.270138e-02
Education_Level_5        -5.388090e-02 -5.770794e-02 -6.141184e-02
Education_Level_6        -1.401869e-01 -1.454212e-01 -1.503666e-01
Education_Level_7        -1.140456e-01 -1.197726e-01 -1.252375e-01
Education_Level_8        -1.393594e-01 -1.463560e-01 -1.530252e-01
Livelihood_Occupation_7   6.610936e-02  6.604130e-02  6.583633e-02
Livelihood_Occupation_9   7.968613e-02  8.027089e-02  8.070166e-02
Livelihood_Occupation_17 -5.096308e-01 -5.229859e-01 -5.352177e-01
                                                                  
(Intercept)               8.331932e-01  8.348868e-01  8.364680e-01
(Intercept)               .             .             .           
Number_Trips              9.746331e-03  9.812849e-03  9.872174e-03
Rent_per_Month            1.181156e-05  1.191210e-05  1.200698e-05
Monthly_Remittances       1.422470e-05  1.428933e-05  1.434373e-05
Wage_Last_Head            4.992856e-06  5.143496e-06  5.283061e-06
Duration_of_stay         -1.097460e-04 -1.125256e-04 -1.150668e-04
Education_Level_3        -4.414851e-02 -4.552413e-02 -4.679496e-02
Education_Level_5        -6.496208e-02 -6.834279e-02 -7.150966e-02
Education_Level_6        -1.550075e-01 -1.593480e-01 -1.633436e-01
Education_Level_7        -1.304101e-01 -1.352785e-01 -1.397992e-01
Education_Level_8        -1.593320e-01 -1.652576e-01 -1.707667e-01
Livelihood_Occupation_7   6.552569e-02  6.513840e-02  6.469806e-02
Livelihood_Occupation_9   8.100372e-02  8.120278e-02  8.131352e-02
Livelihood_Occupation_17 -5.463602e-01 -5.564607e-01 -5.655729e-01
                                                                  
(Intercept)               8.379664e-01  8.393629e-01  8.406545e-01
(Intercept)               .             .             .           
Number_Trips              9.920876e-03  9.961779e-03  9.996009e-03
Rent_per_Month            1.207815e-05  1.213488e-05  1.217946e-05
Monthly_Remittances       1.438112e-05  1.440736e-05  1.442472e-05
Wage_Last_Head            5.415258e-06  5.538377e-06  5.652287e-06
Duration_of_stay         -1.173864e-04 -1.194912e-04 -1.213928e-04
Education_Level_3        -4.798618e-02 -4.908727e-02 -5.009876e-02
Education_Level_5        -7.447644e-02 -7.722786e-02 -7.976233e-02
Education_Level_6        -1.670366e-01 -1.704197e-01 -1.735029e-01
Education_Level_7        -1.439954e-01 -1.478574e-01 -1.513911e-01
Education_Level_8        -1.758721e-01 -1.805685e-01 -1.848638e-01
Livelihood_Occupation_7   6.422752e-02  6.374388e-02  6.326118e-02
Livelihood_Occupation_9   8.136114e-02  8.135938e-02  8.132157e-02
Livelihood_Occupation_17 -5.737619e-01 -5.810932e-01 -5.876346e-01
                                                                  
(Intercept)               8.418407e-01  8.429234e-01  8.439063e-01
(Intercept)               .             .             .           
Number_Trips              1.002457e-02  1.004835e-02  1.006811e-02
Rent_per_Month            1.221396e-05  1.224023e-05  1.225987e-05
Monthly_Remittances       1.443518e-05  1.444037e-05  1.444164e-05
Wage_Last_Head            5.757027e-06  5.852786e-06  5.939869e-06
Duration_of_stay         -1.231040e-04 -1.246383e-04 -1.260094e-04
Education_Level_3        -5.102265e-02 -5.186210e-02 -5.262118e-02
Education_Level_5        -8.208250e-02 -8.419453e-02 -8.610728e-02
Education_Level_6        -1.762993e-01 -1.788244e-01 -1.810954e-01
Education_Level_7        -1.546072e-01 -1.575197e-01 -1.601456e-01
Education_Level_8        -1.887713e-01 -1.923089e-01 -1.954974e-01
Livelihood_Occupation_7   6.279014e-02  6.233865e-02  6.191214e-02
Livelihood_Occupation_9   8.125859e-02  8.117912e-02  8.108996e-02
Livelihood_Occupation_17 -5.934539e-01 -5.986167e-01 -6.031862e-01
                                                                  
(Intercept)               8.447940e-01  8.455922e-01  8.463070e-01
(Intercept)               .             .             .           
Number_Trips              1.008453e-02  1.009816e-02  1.010948e-02
Rent_per_Month            1.227424e-05  1.228447e-05  1.229152e-05
Monthly_Remittances       1.444008e-05  1.443656e-05  1.443175e-05
Wage_Last_Head            6.018678e-06  6.089686e-06  6.153407e-06
Duration_of_stay         -1.272309e-04 -1.283162e-04 -1.292782e-04
Education_Level_3        -5.330458e-02 -5.391744e-02 -5.446508e-02
Education_Level_5        -8.783154e-02 -8.937944e-02 -9.076386e-02
Education_Level_6        -1.831303e-01 -1.849474e-01 -1.865653e-01
Education_Level_7        -1.625034e-01 -1.646129e-01 -1.664939e-01
Education_Level_8        -1.983597e-01 -2.009199e-01 -2.032025e-01
Livelihood_Occupation_7   6.151403e-02  6.114613e-02  6.080902e-02
Livelihood_Occupation_9   8.099628e-02  8.090190e-02  8.080960e-02
Livelihood_Occupation_17 -6.072221e-01 -6.107799e-01 -6.139111e-01
                                                                  
(Intercept)               8.469450e-01  8.474946e-01  8.479995e-01
(Intercept)               .             .             .           
Number_Trips              1.011889e-02  1.012917e-02  1.013551e-02
Rent_per_Month            1.229614e-05  1.230770e-05  1.230853e-05
Monthly_Remittances       1.442618e-05  1.442437e-05  1.441802e-05
Wage_Last_Head            6.210384e-06  6.258729e-06  6.304041e-06
Duration_of_stay         -1.301289e-04 -1.308727e-04 -1.315348e-04
Education_Level_3        -5.495289e-02 -5.536777e-02 -5.575329e-02
Education_Level_5        -9.199794e-02 -9.307368e-02 -9.404772e-02
Education_Level_6        -1.880019e-01 -1.892479e-01 -1.903746e-01
Education_Level_7        -1.681663e-01 -1.696268e-01 -1.709406e-01
Education_Level_8        -2.052315e-01 -2.070117e-01 -2.086046e-01
Livelihood_Occupation_7   6.050232e-02  6.022514e-02  5.997570e-02
Livelihood_Occupation_9   8.072126e-02  8.063543e-02  8.055836e-02
Livelihood_Occupation_17 -6.166628e-01 -6.190791e-01 -6.211961e-01
                                                                  
(Intercept)               8.484469e-01  8.488420e-01  8.491678e-01
(Intercept)               .             .             .           
Number_Trips              1.014074e-02  1.014510e-02  1.015126e-02
Rent_per_Month            1.230818e-05  1.230722e-05  1.231576e-05
Monthly_Remittances       1.441167e-05  1.440558e-05  1.440486e-05
Wage_Last_Head            6.344268e-06  6.379853e-06  6.407789e-06
Duration_of_stay         -1.321174e-04 -1.326293e-04 -1.330681e-04
Education_Level_3        -5.609438e-02 -5.639538e-02 -5.663052e-02
Education_Level_5        -9.490950e-02 -9.567017e-02 -9.630061e-02
Education_Level_6        -1.913691e-01 -1.922450e-01 -1.929730e-01
Education_Level_7        -1.721010e-01 -1.731237e-01 -1.739829e-01
Education_Level_8        -2.100110e-01 -2.112505e-01 -2.123048e-01
Livelihood_Occupation_7   5.975233e-02  5.955309e-02  5.937887e-02
Livelihood_Occupation_9   8.048758e-02  8.042305e-02  8.036280e-02
Livelihood_Occupation_17 -6.230501e-01 -6.246724e-01 -6.260946e-01
                                                                  
(Intercept)               8.494747e-01  8.497467e-01  8.499855e-01
(Intercept)               .             .             .           
Number_Trips              1.015432e-02  1.015660e-02  1.015850e-02
Rent_per_Month            1.231424e-05  1.231146e-05  1.230870e-05
Monthly_Remittances       1.439953e-05  1.439409e-05  1.438912e-05
Wage_Last_Head            6.435597e-06  6.460320e-06  6.482061e-06
Duration_of_stay         -1.334621e-04 -1.338079e-04 -1.341105e-04
Education_Level_3        -5.686580e-02 -5.707381e-02 -5.725639e-02
Education_Level_5        -9.689314e-02 -9.741516e-02 -9.787327e-02
Education_Level_6        -1.936517e-01 -1.942494e-01 -1.947734e-01
Education_Level_7        -1.747762e-01 -1.754743e-01 -1.760863e-01
Education_Level_8        -2.132653e-01 -2.141097e-01 -2.148499e-01
Livelihood_Occupation_7   5.922150e-02  5.908238e-02  5.895964e-02
Livelihood_Occupation_9   8.031008e-02  8.026322e-02  8.022144e-02
Livelihood_Occupation_17 -6.273334e-01 -6.284153e-01 -6.293595e-01
                                                                  
(Intercept)               8.501948e-01  8.503782e-01  8.505387e-01
(Intercept)               .             .             .           
Number_Trips              1.016008e-02  1.016141e-02  1.016253e-02
Rent_per_Month            1.230609e-05  1.230366e-05  1.230141e-05
Monthly_Remittances       1.438462e-05  1.438058e-05  1.437697e-05
Wage_Last_Head            6.501142e-06  6.517870e-06  6.532520e-06
Duration_of_stay         -1.343751e-04 -1.346063e-04 -1.348082e-04
Education_Level_3        -5.741641e-02 -5.755654e-02 -5.767913e-02
Education_Level_5        -9.827481e-02 -9.862640e-02 -9.893400e-02
Education_Level_6        -1.952321e-01 -1.956335e-01 -1.959843e-01
Education_Level_7        -1.766224e-01 -1.770914e-01 -1.775015e-01
Education_Level_8        -2.154981e-01 -2.160652e-01 -2.165611e-01
Livelihood_Occupation_7   5.885153e-02  5.875648e-02  5.867303e-02
Livelihood_Occupation_9   8.018430e-02  8.015140e-02  8.012232e-02
Livelihood_Occupation_17 -6.301833e-01 -6.309017e-01 -6.315279e-01
                                                                  
(Intercept)               8.506791e-01  8.508017e-01  8.508936e-01
(Intercept)               .             .             .           
Number_Trips              1.016348e-02  1.016428e-02  1.016655e-02
Rent_per_Month            1.229936e-05  1.229751e-05  1.230068e-05
Monthly_Remittances       1.437375e-05  1.437090e-05  1.437112e-05
Wage_Last_Head            6.545338e-06  6.556546e-06  6.564103e-06
Duration_of_stay         -1.349844e-04 -1.351382e-04 -1.352652e-04
Education_Level_3        -5.778630e-02 -5.787994e-02 -5.794314e-02
Education_Level_5        -9.920291e-02 -9.943784e-02 -9.961023e-02
Education_Level_6        -1.962908e-01 -1.965584e-01 -1.967618e-01
Education_Level_7        -1.778598e-01 -1.781728e-01 -1.784133e-01
Education_Level_8        -2.169943e-01 -2.173726e-01 -2.176706e-01
Livelihood_Occupation_7   5.859985e-02  5.853575e-02  5.848729e-02
Livelihood_Occupation_9   8.009667e-02  8.007410e-02  8.005633e-02
Livelihood_Occupation_17 -6.320737e-01 -6.325492e-01 -6.329651e-01

# Call lambda values and corresponding test MSEs
lambda_values <- cv.out$lambda
test_mses <- cv.out$cvm

# Plot the test MSE as a function of the log of the regularization 
# parameter (i.e. log($\lambda$)) for several orders of magnitude.

plot(log(lambda_values), test_mses, type = "b", 
     xlab = "log(lambda)", ylab = "Test MSE",
     main = "Test MSE vs. log(lambda) of Ridge Regression")

Lasso



# Train Lasso regression model
lasso.mod <- glmnet(x[train, ], y[train], alpha = 1, lambda.min.ratio = 0.000001)

# Cross-validation for selecting lambda
cv.out <- cv.glmnet(x[train, ], y[train], alpha = 1, lambda.min.ratio = 0.000001)

# Find lambda that minimizes training MSE
bestlam <- cv.out$lambda.min

# Predict the test data with the best lambda
lasso.pred <- predict(lasso.mod, s = bestlam, newx = x[test, ])

# Assuming predictions are probabilities, convert them to binary classes
predicted_classes <- ifelse(lasso.pred > 0.5, 1, 0)

# Confusion matrix
conf_matrix <- table(predicted_classes, y[test])

# Calculate Sensitivity (True Positive Rate)
sensitivity <- conf_matrix[2, 2] / sum(conf_matrix[2, ])

# Calculate Specificity (True Negative Rate)
specificity <- conf_matrix[1, 1] / sum(conf_matrix[1, ])

# Calculate Accuracy
accuracy <- sum(diag(conf_matrix)) / sum(conf_matrix)

# Calculate Precision (Positive Predictive Value)
precision <- conf_matrix[2, 2] / sum(conf_matrix[, 2])

# Calculate Recall (Same as Sensitivity)
recall <- sensitivity

# Calculate False Positive Rate
fpr <- 1 - specificity

F1 <- 2 * (precision * recall) / (precision + recall)


# Create ROC Curve

roc_curve_lasso <- roc(y[test], lasso.pred)

# Calculate AUC (Area Under Curve)
auc_value_lasso <- auc(roc_curve_lasso)

roc_curve <- roc(y[test], lasso.pred)

# Calculate AUC (Area Under Curve)
auc_value <- auc(roc_curve)


# Print the metrics
print("Metrics:")
print(paste("Sensitivity (True Positive Rate):", sensitivity))
print(paste("Specificity (True Negative Rate):", specificity))
print(paste("Accuracy:", accuracy))
print(paste("Precision (Positive Predictive Value):", precision))
print(paste("Recall:", recall))
print(paste("False Positive Rate:", fpr))
print(paste("F1:", F1))
print(paste("AUC (Area Under Curve):", auc_value_lasso))

# Plot ROC curve
plot(roc_curve_lasso, main = "ROC Curve", col = "blue")

# Save AUC
auc_lasso <- round(auc(roc_curve_lasso), 2)


print(paste("AUC (Area Under Curve):", auc_value))

# Plot ROC curve
plot(roc_curve, main = "ROC Curve", col = "blue")

coef(lasso.mod)
Setting levels: control = 0, case = 1

Warning message in roc.default(y[test], lasso.pred):
"Deprecated use a matrix as predictor. Unexpected results may be produced, please pass a numeric vector."
Setting direction: controls < cases

Setting levels: control = 0, case = 1

Warning message in roc.default(y[test], lasso.pred):
"Deprecated use a matrix as predictor. Unexpected results may be produced, please pass a numeric vector."
Setting direction: controls < cases

  [[ suppressing 48 column names 's0', 's1', 's2' ... ]]
[1] "Metrics:"
[1] "Sensitivity (True Positive Rate): 0.917808219178082"
[1] "Specificity (True Negative Rate): 0.674418604651163"
[1] "Accuracy: 0.892156862745098"
[1] "Precision (Positive Predictive Value): 0.959885386819484"
[1] "Recall: 0.917808219178082"
[1] "False Positive Rate: 0.325581395348837"
[1] "F1: 0.938375350140056"
[1] "AUC (Area Under Curve): 0.885095430042251"
[1] "AUC (Area Under Curve): 0.885095430042251"
15 x 48 sparse Matrix of class "dgCMatrix"
                                                                               
(Intercept)              0.8385678  0.84459594  0.8498389  0.8543989  0.8583650
(Intercept)              .          .           .          .          .        
Number_Trips             .          .           .          .          .        
Rent_per_Month           .          .           .          .          .        
Monthly_Remittances      .          .           .          .          .        
Wage_Last_Head           .          .           .          .          .        
Duration_of_stay         .          .           .          .          .        
Education_Level_3        .          .           .          .          .        
Education_Level_5        .          .           .          .          .        
Education_Level_6        .          .           .          .          .        
Education_Level_7        .          .           .          .          .        
Education_Level_8        .          .           .          .          .        
Livelihood_Occupation_7  .          .           .          .          .        
Livelihood_Occupation_9  .          .           .          .          .        
Livelihood_Occupation_17 .         -0.08968912 -0.1676961 -0.2355427 -0.2945521
                                                                             
(Intercept)               0.8618145  8.621181e-01  8.581691e-01  8.547354e-01
(Intercept)               .          .             .             .           
Number_Trips              .          .             .             .           
Rent_per_Month            .          .             .             .           
Monthly_Remittances       .          1.074111e-06  3.686501e-06  5.958208e-06
Wage_Last_Head            .          .             .             .           
Duration_of_stay          .          .             .             .           
Education_Level_3         .          .             .             .           
Education_Level_5         .          .             .             .           
Education_Level_6         .          .             .             .           
Education_Level_7         .          .             .             .           
Education_Level_8         .          .             .             .           
Livelihood_Occupation_7   .          .             .             .           
Livelihood_Occupation_9   .          .             .             .           
Livelihood_Occupation_17 -0.3458755 -3.884599e-01 -4.222882e-01 -4.517112e-01
                                                                  
(Intercept)               8.517490e-01  8.489275e-01  8.461071e-01
(Intercept)               .             .             .           
Number_Trips              .             .             .           
Rent_per_Month            .             .             .           
Monthly_Remittances       7.934023e-06  9.607007e-06  1.057038e-05
Wage_Last_Head            .             4.548321e-08  6.156493e-07
Duration_of_stay          .             .             .           
Education_Level_3         .             .             .           
Education_Level_5         .             .             .           
Education_Level_6         .             .             .           
Education_Level_7         .             .             .           
Education_Level_8         .             .            -1.184768e-02
Livelihood_Occupation_7   .             .             .           
Livelihood_Occupation_9   .             .             .           
Livelihood_Occupation_17 -4.773018e-01 -4.993742e-01 -5.181312e-01
                                                                  
(Intercept)               8.467157e-01  8.481282e-01  8.496150e-01
(Intercept)               .             .             .           
Number_Trips              .             .             .           
Rent_per_Month            .             .             1.170596e-06
Monthly_Remittances       1.121465e-05  1.175257e-05  1.223143e-05
Wage_Last_Head            1.347024e-06  2.009614e-06  2.558577e-06
Duration_of_stay         -1.267819e-05 -2.934298e-05 -4.437984e-05
Education_Level_3         .             .             .           
Education_Level_5         .             .             .           
Education_Level_6         .             .             .           
Education_Level_7         .             .            -1.421143e-02
Education_Level_8        -3.270034e-02 -5.098026e-02 -6.899918e-02
Livelihood_Occupation_7   .             .             .           
Livelihood_Occupation_9   .             .             4.382115e-04
Livelihood_Occupation_17 -5.356182e-01 -5.510074e-01 -5.648320e-01
                                                                  
(Intercept)               8.495092e-01  8.477561e-01  8.461447e-01
(Intercept)               .             .             .           
Number_Trips              4.901507e-04  1.508021e-03  2.416258e-03
Rent_per_Month            2.639183e-06  3.838212e-06  4.920779e-06
Monthly_Remittances       1.255319e-05  1.277602e-05  1.298753e-05
Wage_Last_Head            3.014969e-06  3.422343e-06  3.766555e-06
Duration_of_stay         -5.730624e-05 -6.745751e-05 -7.624610e-05
Education_Level_3         .             .             .           
Education_Level_5         .             .             .           
Education_Level_6        -2.572668e-03 -2.507511e-02 -4.457094e-02
Education_Level_7        -3.245827e-02 -4.833310e-02 -6.208087e-02
Education_Level_8        -8.497298e-02 -9.879766e-02 -1.107763e-01
Livelihood_Occupation_7   .             8.648452e-03  1.620796e-02
Livelihood_Occupation_9   1.001095e-02  1.952200e-02  2.778820e-02
Livelihood_Occupation_17 -5.762271e-01 -5.849279e-01 -5.924845e-01
                                                                  
(Intercept)               8.447970e-01  8.444533e-01  8.443245e-01
(Intercept)               .             .             .           
Number_Trips              3.191901e-03  4.094501e-03  4.925224e-03
Rent_per_Month            5.834782e-06  6.644101e-06  7.350972e-06
Monthly_Remittances       1.315921e-05  1.329222e-05  1.340360e-05
Wage_Last_Head            4.072411e-06  4.392479e-06  4.683174e-06
Duration_of_stay         -8.391429e-05 -9.070730e-05 -9.664481e-05
Education_Level_3         .             .            -1.456080e-05
Education_Level_5         .            -9.986517e-03 -2.067954e-02
Education_Level_6        -6.157157e-02 -7.786357e-02 -9.236055e-02
Education_Level_7        -7.406711e-02 -8.636265e-02 -9.746447e-02
Education_Level_8        -1.212150e-01 -1.324795e-01 -1.427445e-01
Livelihood_Occupation_7   2.278798e-02  2.740148e-02  3.118295e-02
Livelihood_Occupation_9   3.498759e-02  4.058860e-02  4.532205e-02
Livelihood_Occupation_17 -5.990617e-01 -6.047094e-01 -6.096050e-01
                                                                  
(Intercept)               8.452709e-01  8.460998e-01  8.468207e-01
(Intercept)               .             .             .           
Number_Trips              5.609173e-03  6.203110e-03  6.719684e-03
Rent_per_Month            7.996488e-06  8.554852e-06  9.040484e-06
Monthly_Remittances       1.352848e-05  1.363554e-05  1.372865e-05
Wage_Last_Head            4.936385e-06  5.157347e-06  5.349529e-06
Duration_of_stay         -1.017914e-04 -1.062700e-04 -1.101653e-04
Education_Level_3        -7.633529e-03 -1.426094e-02 -2.002512e-02
Education_Level_5        -3.114332e-02 -4.024639e-02 -4.816379e-02
Education_Level_6        -1.061635e-01 -1.181735e-01 -1.286192e-01
Education_Level_7        -1.082488e-01 -1.176326e-01 -1.257941e-01
Education_Level_8        -1.527949e-01 -1.615399e-01 -1.691460e-01
Livelihood_Occupation_7   3.468850e-02  3.773744e-02  4.038924e-02
Livelihood_Occupation_9   4.982723e-02  5.374642e-02  5.715514e-02
Livelihood_Occupation_17 -6.130089e-01 -6.159702e-01 -6.185458e-01
                                                                  
(Intercept)               8.474296e-01  8.479772e-01  8.484535e-01
(Intercept)               .             .             .           
Number_Trips              7.172152e-03  7.562525e-03  7.902034e-03
Rent_per_Month            9.471785e-06  9.838042e-06  1.015654e-05
Monthly_Remittances       1.381404e-05  1.388393e-05  1.394469e-05
Wage_Last_Head            5.514357e-06  5.660026e-06  5.786734e-06
Duration_of_stay         -1.135459e-04 -1.164935e-04 -1.190571e-04
Education_Level_3        -2.502897e-02 -2.939059e-02 -3.318412e-02
Education_Level_5        -5.503740e-02 -6.102823e-02 -6.623876e-02
Education_Level_6        -1.376840e-01 -1.455883e-01 -1.524632e-01
Education_Level_7        -1.328756e-01 -1.390517e-01 -1.444234e-01
Education_Level_8        -1.757473e-01 -1.815027e-01 -1.865086e-01
Livelihood_Occupation_7   4.269522e-02  4.470127e-02  4.644603e-02
Livelihood_Occupation_9   6.011696e-02  6.269589e-02  6.493893e-02
Livelihood_Occupation_17 -6.207854e-01 -6.227338e-01 -6.244285e-01
                                                                  
(Intercept)               8.488679e-01  8.492282e-01  8.495219e-01
(Intercept)               .             .             .           
Number_Trips              8.197322e-03  8.454148e-03  8.680857e-03
Rent_per_Month            1.043355e-05  1.067448e-05  1.089327e-05
Monthly_Remittances       1.399754e-05  1.404350e-05  1.408814e-05
Wage_Last_Head            5.896938e-06  5.992788e-06  6.073496e-06
Duration_of_stay         -1.212869e-04 -1.232262e-04 -1.249046e-04
Education_Level_3        -3.648354e-02 -3.935321e-02 -4.183611e-02
Education_Level_5        -7.077062e-02 -7.471220e-02 -7.812202e-02
Education_Level_6        -1.584427e-01 -1.636433e-01 -1.681410e-01
Education_Level_7        -1.490954e-01 -1.531589e-01 -1.566703e-01
Education_Level_8        -1.908624e-01 -1.946492e-01 -1.979233e-01
Livelihood_Occupation_7   4.796353e-02  4.928338e-02  5.043198e-02
Livelihood_Occupation_9   6.688982e-02  6.858660e-02  7.005996e-02
Livelihood_Occupation_17 -6.259024e-01 -6.271843e-01 -6.282995e-01
                                                                  
(Intercept)               8.497967e-01  8.500361e-01  8.502443e-01
(Intercept)               .             .             .           
Number_Trips              8.874761e-03  9.043351e-03  9.189982e-03
Rent_per_Month            1.107451e-05  1.123195e-05  1.136889e-05
Monthly_Remittances       1.412240e-05  1.415210e-05  1.417793e-05
Wage_Last_Head            6.146308e-06  6.209676e-06  6.264792e-06
Duration_of_stay         -1.263726e-04 -1.276495e-04 -1.287601e-04
Education_Level_3        -4.400850e-02 -4.589803e-02 -4.754145e-02
Education_Level_5        -8.110596e-02 -8.370135e-02 -8.595869e-02
Education_Level_6        -1.720781e-01 -1.755027e-01 -1.784812e-01
Education_Level_7        -1.597469e-01 -1.624230e-01 -1.647506e-01
Education_Level_8        -2.007902e-01 -2.032838e-01 -2.054527e-01
Livelihood_Occupation_7   5.143028e-02  5.229858e-02  5.305378e-02
Livelihood_Occupation_9   7.134375e-02  7.246040e-02  7.343161e-02
Livelihood_Occupation_17 -6.292691e-01 -6.301125e-01 -6.308461e-01
                                                                  
(Intercept)               8.504036e-01  8.505627e-01  8.507022e-01
(Intercept)               .             .             .           
Number_Trips              9.320567e-03  9.431298e-03  9.527406e-03
Rent_per_Month            1.149749e-05  1.160047e-05  1.168943e-05
Monthly_Remittances       1.420556e-05  1.422473e-05  1.424112e-05
Wage_Last_Head            6.309466e-06  6.351432e-06  6.388078e-06
Duration_of_stay         -1.297162e-04 -1.305572e-04 -1.312890e-04
Education_Level_3        -4.895078e-02 -5.019604e-02 -5.127963e-02
Education_Level_5        -8.789095e-02 -8.960193e-02 -9.109071e-02
Education_Level_6        -1.810358e-01 -1.832924e-01 -1.852562e-01
Education_Level_7        -1.667398e-01 -1.685041e-01 -1.700396e-01
Education_Level_8        -2.073084e-01 -2.089523e-01 -2.103828e-01
Livelihood_Occupation_7   5.371358e-02  5.428439e-02  5.478093e-02
Livelihood_Occupation_9   7.427502e-02  7.500963e-02  7.564879e-02
Livelihood_Occupation_17 -6.314857e-01 -6.320403e-01 -6.325228e-01
                                                                  
(Intercept)               8.508237e-01  8.509293e-01  8.510211e-01
(Intercept)               .             .             .           
Number_Trips              9.610988e-03  9.683683e-03  9.746909e-03
Rent_per_Month            1.176678e-05  1.183406e-05  1.189257e-05
Monthly_Remittances       1.425535e-05  1.426774e-05  1.427851e-05
Wage_Last_Head            6.419956e-06  6.447682e-06  6.471797e-06
Duration_of_stay         -1.319256e-04 -1.324792e-04 -1.329607e-04
Education_Level_3        -5.222209e-02 -5.304180e-02 -5.375474e-02
Education_Level_5        -9.238558e-02 -9.351179e-02 -9.449131e-02
Education_Level_6        -1.869643e-01 -1.884500e-01 -1.897421e-01
Education_Level_7        -1.713750e-01 -1.725366e-01 -1.735468e-01
Education_Level_8        -2.116270e-01 -2.127092e-01 -2.136503e-01
Livelihood_Occupation_7   5.521281e-02  5.558843e-02  5.591513e-02
Livelihood_Occupation_9   7.620470e-02  7.668821e-02  7.710873e-02
Livelihood_Occupation_17 -6.329424e-01 -6.333073e-01 -6.336248e-01
                                                                  
(Intercept)               8.510868e-01  8.511556e-01  8.512177e-01
(Intercept)               .             .             .           
Number_Trips              9.802568e-03  9.850705e-03  9.892237e-03
Rent_per_Month            1.194909e-05  1.199382e-05  1.203168e-05
Monthly_Remittances       1.429164e-05  1.429991e-05  1.430657e-05
Wage_Last_Head            6.489905e-06  6.508167e-06  6.524365e-06
Duration_of_stay         -1.333723e-04 -1.337365e-04 -1.340541e-04
Education_Level_3        -5.435032e-02 -5.489100e-02 -5.536294e-02
Education_Level_5        -9.530354e-02 -9.604709e-02 -9.669623e-02
Education_Level_6        -1.908274e-01 -1.918064e-01 -1.926611e-01
Education_Level_7        -1.743850e-01 -1.751513e-01 -1.758208e-01
Education_Level_8        -2.144332e-01 -2.151472e-01 -2.157706e-01
Livelihood_Occupation_7   5.620293e-02  5.644963e-02  5.666412e-02
Livelihood_Occupation_9   7.747426e-02  7.779204e-02  7.806872e-02
Livelihood_Occupation_17 -6.339050e-01 -6.341447e-01 -6.343530e-01
                                                                  
(Intercept)               8.512719e-01  8.513216e-01  8.513611e-01
(Intercept)               .             .             .           
Number_Trips              9.928307e-03  9.955940e-03  9.983034e-03
Rent_per_Month            1.206447e-05  1.209342e-05  1.211928e-05
Monthly_Remittances       1.431228e-05  1.431929e-05  1.432428e-05
Wage_Last_Head            6.538488e-06  6.547277e-06  6.557589e-06
Duration_of_stay         -1.343305e-04 -1.345670e-04 -1.347758e-04
Education_Level_3        -5.577353e-02 -5.607983e-02 -5.638763e-02
Education_Level_5        -9.726097e-02 -9.768335e-02 -9.810613e-02
Education_Level_6        -1.934047e-01 -1.939936e-01 -1.945516e-01
Education_Level_7        -1.764033e-01 -1.768507e-01 -1.772870e-01
Education_Level_8        -2.163131e-01 -2.167341e-01 -2.171408e-01
Livelihood_Occupation_7   5.685070e-02  5.700403e-02  5.714483e-02
Livelihood_Occupation_9   7.830943e-02  7.850898e-02  7.869021e-02
Livelihood_Occupation_17 -6.345342e-01 -6.347100e-01 -6.348476e-01
                                                                  
(Intercept)               8.513960e-01  8.514268e-01  8.514537e-01
(Intercept)               .             .             .           
Number_Trips              1.000725e-02  1.002837e-02  1.004672e-02
Rent_per_Month            1.214111e-05  1.215978e-05  1.217591e-05
Monthly_Remittances       1.432796e-05  1.433097e-05  1.433352e-05
Wage_Last_Head            6.567189e-06  6.575689e-06  6.583119e-06
Duration_of_stay         -1.349578e-04 -1.351164e-04 -1.352544e-04
Education_Level_3        -5.666297e-02 -5.690391e-02 -5.711375e-02
Education_Level_5        -9.848474e-02 -9.881610e-02 -9.910466e-02
Education_Level_6        -1.950464e-01 -1.954788e-01 -1.958553e-01
Education_Level_7        -1.776761e-01 -1.780165e-01 -1.783129e-01
Education_Level_8        -2.175027e-01 -2.178192e-01 -2.180947e-01
Livelihood_Occupation_7   5.726855e-02  5.737637e-02  5.747018e-02
Livelihood_Occupation_9   7.884956e-02  7.898848e-02  7.910938e-02
Livelihood_Occupation_17 -6.349648e-01 -6.350664e-01 -6.351547e-01


# evalusation function

evaluate_model <- function(model, actual, predicted_prob, threshold = 0.5, plot_roc = TRUE, show_summary = FALSE) {
  predicted_classes <- ifelse(predicted_prob > threshold, 1, 0)
  confusion <- confusionMatrix(as.factor(predicted_classes), as.factor(actual), positive = "1")


  roc_result <- roc(actual, predicted_prob)
  auc_value <- auc(roc_result)


  if (plot_roc) {
    plot(roc_result, main = "ROC Curve")
    abline(a = 0, b = 1, col = "red")  # Adding a reference line
    text(x = 0.8, y = 0.2, labels = paste("AUC =", round(auc_value, 2)))
  }
  
  # Optionally print model summary
  if (show_summary && !is.null(model)) {
    print(summary(model))
  }
  

  list(
    Accuracy = confusion$overall['Accuracy'],
    Precision = confusion$byClass['Precision'],
    Recall = confusion$byClass['Sensitivity'], 
    Specificity = confusion$byClass['Specificity'],
    AUC = auc_value,
    ROC = roc_result
  )
}

#set.seed(123) # for reproducibility
#train_idx <- createDataPartition(df1$Work_Earn_Money_1, p = 0.8, list = FALSE)
#train_set <- df1[train_idx, ]
#test_set <- df1[-train_idx, ]

Elastic



# Train Elastic Net regression model
elastic.mod <- glmnet(x[train, ], y[train], alpha = 0.5, lambda.min.ratio = 0.000001)

# Cross-validation for selecting lambda
cv.out <- cv.glmnet(x[train, ], y[train], alpha = 0.5, lambda.min.ratio = 0.000001)

# Find lambda that minimizes training MSE
bestlam <- cv.out$lambda.min

# Predict the test data with the best lambda
elastic.pred <- predict(elastic.mod, s = bestlam, newx = x[test, ])

# Assuming predictions are probabilities, convert them to binary classes
predicted_classes <- ifelse(elastic.pred > 0.5, 1, 0)

# Confusion matrix
conf_matrix <- table(predicted_classes, y[test])

# Calculate Sensitivity (True Positive Rate)
sensitivity <- conf_matrix[2, 2] / sum(conf_matrix[2, ])

# Calculate Specificity (True Negative Rate)
specificity <- conf_matrix[1, 1] / sum(conf_matrix[1, ])

# Calculate Accuracy
accuracy <- sum(diag(conf_matrix)) / sum(conf_matrix)

# Calculate Precision (Positive Predictive Value)
precision <- conf_matrix[2, 2] / sum(conf_matrix[, 2])

# Calculate Recall (Same as Sensitivity)
recall <- sensitivity

# Calculate False Positive Rate
fpr <- 1 - specificity


F1 <- 2 * (precision * recall) / (precision + recall)

# Create ROC Curve
roc_curve_elastic <- roc(y[test], elastic.pred)

# Calculate AUC (Area Under Curve)
auc_elastic <- round(auc(roc_curve_elastic), 2)
roc_curve <- roc(y[test], elastic.pred)

# Calculate AUC (Area Under Curve)
auc_value <- auc(roc_curve)

# Print the metrics
print("Metrics:")
print(paste("Sensitivity (True Positive Rate):", sensitivity))
print(paste("Specificity (True Negative Rate):", specificity))
print(paste("Accuracy:", accuracy))
print(paste("Precision (Positive Predictive Value):", precision))
print(paste("Recall:", recall))
print(paste("False Positive Rate:", fpr))
print(paste("AUC (Area Under Curve):", auc_elastic))
print(paste("FI", F1))

# Plot ROC curve
plot(roc_curve_lasso, main = "ROC Curve", col = "blue", lwd = 2)
plot(roc_curve_elastic, main = "ROC Curve", col = "green", add=TRUE)

# Add a legend
legend("bottomright", legend = c("Curve 1", "Curve 2", "Curve 3"),
       col = c("blue", "red", "green"), lty = 1, lwd = 2)
print(paste("AUC (Area Under Curve):", auc_value))

# Plot ROC curve
plot(roc_curve, main = "ROC Curve", col = "blue")

coef(elastic.mod)
Setting levels: control = 0, case = 1

Warning message in roc.default(y[test], elastic.pred):
"Deprecated use a matrix as predictor. Unexpected results may be produced, please pass a numeric vector."
Setting direction: controls < cases

Setting levels: control = 0, case = 1

Warning message in roc.default(y[test], elastic.pred):
"Deprecated use a matrix as predictor. Unexpected results may be produced, please pass a numeric vector."
Setting direction: controls < cases

  [[ suppressing 48 column names 's0', 's1', 's2' ... ]]
[1] "Metrics:"
[1] "Sensitivity (True Positive Rate): 0.917808219178082"
[1] "Specificity (True Negative Rate): 0.674418604651163"
[1] "Accuracy: 0.892156862745098"
[1] "Precision (Positive Predictive Value): 0.959885386819484"
[1] "Recall: 0.917808219178082"
[1] "False Positive Rate: 0.325581395348837"
[1] "AUC (Area Under Curve): 0.89"
[1] "FI 0.938375350140056"
[1] "AUC (Area Under Curve): 0.88514399494925"

15 x 48 sparse Matrix of class "dgCMatrix"
                                                                               
(Intercept)              0.8385678  0.84285047  0.8468891  0.8506682  0.8541789
(Intercept)              .          .           .          .          .        
Number_Trips             .          .           .          .          .        
Rent_per_Month           .          .           .          .          .        
Monthly_Remittances      .          .           .          .          .        
Wage_Last_Head           .          .           .          .          .        
Duration_of_stay         .          .           .          .          .        
Education_Level_3        .          .           .          .          .        
Education_Level_5        .          .           .          .          .        
Education_Level_6        .          .           .          .          .        
Education_Level_7        .          .           .          .          .        
Education_Level_8        .          .           .          .          .        
Livelihood_Occupation_7  .          .           .          .          .        
Livelihood_Occupation_9  .          .           .          .          .        
Livelihood_Occupation_17 .         -0.06371919 -0.1238083 -0.1800357 -0.2322686
                                                                             
(Intercept)               0.8574181  8.566038e-01  8.537586e-01  8.512003e-01
(Intercept)               .          .             .             .           
Number_Trips              .          .             .             .           
Rent_per_Month            .          .             .             .           
Monthly_Remittances       .          1.520649e-06  3.748323e-06  5.756193e-06
Wage_Last_Head            .          .             .             .           
Duration_of_stay          .          .             .             .           
Education_Level_3         .          .             .             .           
Education_Level_5         .          .             .             .           
Education_Level_6         .          .             .             .           
Education_Level_7         .          .             .             .           
Education_Level_8         .          .             .             .           
Livelihood_Occupation_7   .          .             .             .           
Livelihood_Occupation_9   .          .             .             .           
Livelihood_Occupation_17 -0.2804641 -3.222402e-01 -3.588579e-01 -3.919535e-01
                                                                  
(Intercept)               8.489083e-01  8.453661e-01  8.429289e-01
(Intercept)               .             .             .           
Number_Trips              .             .             .           
Rent_per_Month            .             .             .           
Monthly_Remittances       7.558714e-06  8.909284e-06  9.954236e-06
Wage_Last_Head            .             2.906249e-07  7.487624e-07
Duration_of_stay          .             .             .           
Education_Level_3         .             .             .           
Education_Level_5         .             .             .           
Education_Level_6         .             .             .           
Education_Level_7         .             .             .           
Education_Level_8         .             .            -9.110655e-03
Livelihood_Occupation_7   .             .             .           
Livelihood_Occupation_9   .             .             .           
Livelihood_Occupation_17 -4.217347e-01 -4.473193e-01 -4.705898e-01
                                                                  
(Intercept)               8.435503e-01  8.450980e-01  8.460103e-01
(Intercept)               .             .             .           
Number_Trips              .             .             .           
Rent_per_Month            .             .             1.334076e-06
Monthly_Remittances       1.072599e-05  1.136842e-05  1.192010e-05
Wage_Last_Head            1.358990e-06  1.936655e-06  2.403417e-06
Duration_of_stay         -1.049434e-05 -2.594694e-05 -4.033518e-05
Education_Level_3         .             .             .           
Education_Level_5         .             .             .           
Education_Level_6         .             .             .           
Education_Level_7         .             .            -9.930477e-03
Education_Level_8        -2.758964e-02 -4.436355e-02 -6.065584e-02
Livelihood_Occupation_7   .             .             .           
Livelihood_Occupation_9   .             .             4.181405e-03
Livelihood_Occupation_17 -4.923683e-01 -5.119171e-01 -5.292203e-01
                                                                  
(Intercept)               8.447097e-01  8.433453e-01  8.422186e-01
(Intercept)               .             .             .           
Number_Trips              9.540962e-04  1.909808e-03  2.740899e-03
Rent_per_Month            2.776449e-06  3.968225e-06  4.997049e-06
Monthly_Remittances       1.229839e-05  1.260150e-05  1.285199e-05
Wage_Last_Head            2.804196e-06  3.186687e-06  3.531661e-06
Duration_of_stay         -5.212749e-05 -6.226260e-05 -7.123564e-05
Education_Level_3         .             .             .           
Education_Level_5         .             .             .           
Education_Level_6        -1.139670e-03 -2.238388e-02 -4.119230e-02
Education_Level_7        -2.689973e-02 -4.223122e-02 -5.584779e-02
Education_Level_8        -7.573697e-02 -8.945260e-02 -1.016697e-01
Livelihood_Occupation_7   4.141180e-03  1.208598e-02  1.905108e-02
Livelihood_Occupation_9   1.353155e-02  2.225329e-02  2.992357e-02
Livelihood_Occupation_17 -5.438599e-01 -5.561587e-01 -5.670003e-01
                                                                  
(Intercept)               8.412689e-01  8.409980e-01  8.411810e-01
(Intercept)               .             .             .           
Number_Trips              3.466776e-03  4.243041e-03  5.032886e-03
Rent_per_Month            5.895048e-06  6.690553e-06  7.391334e-06
Monthly_Remittances       1.306387e-05  1.323703e-05  1.337345e-05
Wage_Last_Head            3.839726e-06  4.145732e-06  4.444646e-06
Duration_of_stay         -7.915813e-05 -8.621658e-05 -9.248846e-05
Education_Level_3         .             .             .           
Education_Level_5         .            -6.516601e-03 -1.704901e-02
Education_Level_6        -5.779952e-02 -7.334890e-02 -8.777688e-02
Education_Level_7        -6.790836e-02 -7.969748e-02 -9.102223e-02
Education_Level_8        -1.125215e-01 -1.234628e-01 -1.341937e-01
Livelihood_Occupation_7   2.514689e-02  2.979341e-02  3.331620e-02
Livelihood_Occupation_9   3.665474e-02  4.215064e-02  4.664379e-02
Livelihood_Occupation_17 -5.765386e-01 -5.848634e-01 -5.921313e-01
                                                                  
(Intercept)               8.423439e-01  8.434214e-01  8.443943e-01
(Intercept)               .             .             .           
Number_Trips              5.686318e-03  6.258949e-03  6.758822e-03
Rent_per_Month            8.023419e-06  8.582501e-06  9.064470e-06
Monthly_Remittances       1.351203e-05  1.363386e-05  1.373541e-05
Wage_Last_Head            4.709414e-06  4.944135e-06  5.152138e-06
Duration_of_stay         -9.798455e-05 -1.028093e-04 -1.070414e-04
Education_Level_3        -6.980583e-03 -1.343157e-02 -1.909890e-02
Education_Level_5        -2.735034e-02 -3.651136e-02 -4.457254e-02
Education_Level_6        -1.015176e-01 -1.136663e-01 -1.243387e-01
Education_Level_7        -1.019935e-01 -1.117352e-01 -1.203088e-01
Education_Level_8        -1.446575e-01 -1.539685e-01 -1.621755e-01
Livelihood_Occupation_7   3.657910e-02  3.941246e-02  4.187202e-02
Livelihood_Occupation_9   5.092117e-02  5.466304e-02  5.792805e-02
Livelihood_Occupation_17 -5.977407e-01 -6.026065e-01 -6.068614e-01
                                                                  
(Intercept)               8.452458e-01  8.460204e-01  8.467076e-01
(Intercept)               .             .             .           
Number_Trips              7.198841e-03  7.580393e-03  7.913531e-03
Rent_per_Month            9.492315e-06  9.856275e-06  1.017277e-05
Monthly_Remittances       1.382607e-05  1.389940e-05  1.396200e-05
Wage_Last_Head            5.333676e-06  5.495776e-06  5.638434e-06
Duration_of_stay         -1.107425e-04 -1.139886e-04 -1.168277e-04
Education_Level_3        -2.406772e-02 -2.843473e-02 -3.226138e-02
Education_Level_5        -5.165323e-02 -5.788396e-02 -6.335066e-02
Education_Level_6        -1.336874e-01 -1.419013e-01 -1.490954e-01
Education_Level_7        -1.278340e-01 -1.344578e-01 -1.402683e-01
Education_Level_8        -1.693915e-01 -1.757486e-01 -1.813311e-01
Livelihood_Occupation_7   4.400508e-02  4.585645e-02  4.746316e-02
Livelihood_Occupation_9   6.077111e-02  6.325125e-02  6.541180e-02
Livelihood_Occupation_17 -6.105762e-01 -6.138190e-01 -6.166480e-01
                                                                  
(Intercept)               8.473156e-01  8.478525e-01  8.483255e-01
(Intercept)               .             .             .           
Number_Trips              8.204260e-03  8.457866e-03  8.679002e-03
Rent_per_Month            1.044800e-05  1.068733e-05  1.089544e-05
Monthly_Remittances       1.401556e-05  1.406144e-05  1.410082e-05
Wage_Last_Head            5.763791e-06  5.873804e-06  5.970242e-06
Duration_of_stay         -1.193091e-04 -1.214766e-04 -1.233688e-04
Education_Level_3        -3.561137e-02 -3.854165e-02 -4.110294e-02
Education_Level_5        -6.814173e-02 -7.233662e-02 -7.600641e-02
Education_Level_6        -1.553908e-01 -1.608955e-01 -1.657055e-01
Education_Level_7        -1.453598e-01 -1.498171e-01 -1.537160e-01
Education_Level_8        -1.862274e-01 -1.905174e-01 -1.942725e-01
Livelihood_Occupation_7   4.885780e-02  5.006860e-02  5.111999e-02
Livelihood_Occupation_9   6.729349e-02  6.893200e-02  7.035851e-02
Livelihood_Occupation_17 -6.191149e-01 -6.212654e-01 -6.231396e-01
                                                                  
(Intercept)               8.487229e-01  8.490903e-01  8.494129e-01
(Intercept)               .             .             .           
Number_Trips              8.874946e-03  9.042587e-03  9.188581e-03
Rent_per_Month            1.108526e-05  1.124170e-05  1.137756e-05
Monthly_Remittances       1.413911e-05  1.416777e-05  1.419236e-05
Wage_Last_Head            6.052211e-06  6.126370e-06  6.191246e-06
Duration_of_stay         -1.250121e-04 -1.264531e-04 -1.277096e-04
Education_Level_3        -4.332840e-02 -4.528305e-02 -4.698882e-02
Education_Level_5        -7.919798e-02 -8.200242e-02 -8.445113e-02
Education_Level_6        -1.698826e-01 -1.735516e-01 -1.767529e-01
Education_Level_7        -1.571033e-01 -1.600825e-01 -1.626837e-01
Education_Level_8        -1.975395e-01 -2.004119e-01 -2.029210e-01
Livelihood_Occupation_7   5.203348e-02  5.282657e-02  5.351559e-02
Livelihood_Occupation_9   7.159783e-02  7.267890e-02  7.361982e-02
Livelihood_Occupation_17 -6.247726e-01 -6.261949e-01 -6.274336e-01
                                                                  
(Intercept)               8.496956e-01  8.499211e-01  8.501391e-01
(Intercept)               .             .             .           
Number_Trips              9.315737e-03  9.429685e-03  9.525889e-03
Rent_per_Month            1.149568e-05  1.160810e-05  1.169678e-05
Monthly_Remittances       1.421356e-05  1.423706e-05  1.425250e-05
Wage_Last_Head            6.247933e-06  6.294207e-06  6.337676e-06
Duration_of_stay         -1.288049e-04 -1.297496e-04 -1.305818e-04
Education_Level_3        -4.847669e-02 -4.975475e-02 -5.088750e-02
Education_Level_5        -8.658810e-02 -8.842290e-02 -9.005144e-02
Education_Level_6        -1.795448e-01 -1.819439e-01 -1.840684e-01
Education_Level_7        -1.649537e-01 -1.669001e-01 -1.686298e-01
Education_Level_8        -2.051115e-01 -2.069938e-01 -2.086638e-01
Livelihood_Occupation_7   5.411426e-02  5.463697e-02  5.508867e-02
Livelihood_Occupation_9   7.443863e-02  7.514952e-02  7.576945e-02
Livelihood_Occupation_17 -6.285122e-01 -6.294528e-01 -6.302700e-01
                                                                  
(Intercept)               8.503309e-01  8.504984e-01  8.506446e-01
(Intercept)               .             .             .           
Number_Trips              9.609447e-03  9.682170e-03  9.745463e-03
Rent_per_Month            1.177330e-05  1.183981e-05  1.189763e-05
Monthly_Remittances       1.426555e-05  1.427682e-05  1.428658e-05
Wage_Last_Head            6.375738e-06  6.408939e-06  6.437883e-06
Duration_of_stay         -1.313072e-04 -1.319388e-04 -1.324888e-04
Education_Level_3        -5.187508e-02 -5.273548e-02 -5.348490e-02
Education_Level_5        -9.147163e-02 -9.270928e-02 -9.378756e-02
Education_Level_6        -1.859207e-01 -1.875343e-01 -1.889396e-01
Education_Level_7        -1.701384e-01 -1.714531e-01 -1.725985e-01
Education_Level_8        -2.101205e-01 -2.113903e-01 -2.124967e-01
Livelihood_Occupation_7   5.548135e-02  5.582267e-02  5.611937e-02
Livelihood_Occupation_9   7.630904e-02  7.677851e-02  7.718694e-02
Livelihood_Occupation_17 -6.309813e-01 -6.316005e-01 -6.321392e-01
                                                                  
(Intercept)               8.507722e-01  8.508681e-01  8.509645e-01
(Intercept)               .             .             .           
Number_Trips              9.800543e-03  9.849588e-03  9.891570e-03
Rent_per_Month            1.194791e-05  1.199788e-05  1.203634e-05
Monthly_Remittances       1.429502e-05  1.430631e-05  1.431276e-05
Wage_Last_Head            6.463111e-06  6.482141e-06  6.501312e-06
Duration_of_stay         -1.329677e-04 -1.333768e-04 -1.337395e-04
Education_Level_3        -5.413753e-02 -5.468170e-02 -5.517774e-02
Education_Level_5        -9.472680e-02 -9.550615e-02 -9.622085e-02
Education_Level_6        -1.901634e-01 -1.911905e-01 -1.921196e-01
Education_Level_7        -1.735961e-01 -1.744250e-01 -1.751836e-01
Education_Level_8        -2.134607e-01 -2.142651e-01 -2.149982e-01
Livelihood_Occupation_7   5.637731e-02  5.660501e-02  5.679953e-02
Livelihood_Occupation_9   7.754226e-02  7.785087e-02  7.811942e-02
Livelihood_Occupation_17 -6.326081e-01 -6.330200e-01 -6.333745e-01
                                                                  
(Intercept)               8.510509e-01  8.511264e-01  8.511910e-01
(Intercept)               .             .             .           
Number_Trips              9.927741e-03  9.959156e-03  9.983752e-03
Rent_per_Month            1.206869e-05  1.209669e-05  1.212253e-05
Monthly_Remittances       1.431780e-05  1.432210e-05  1.432828e-05
Wage_Last_Head            6.518327e-06  6.533178e-06  6.542486e-06
Duration_of_stay         -1.340561e-04 -1.343317e-04 -1.345662e-04
Education_Level_3        -5.561117e-02 -5.598856e-02 -5.626815e-02
Education_Level_5        -9.684542e-02 -9.738925e-02 -9.779536e-02
Education_Level_6        -1.929314e-01 -1.936382e-01 -1.941958e-01
Education_Level_7        -1.758468e-01 -1.764243e-01 -1.768678e-01
Education_Level_8        -2.156389e-01 -2.161968e-01 -2.166313e-01
Livelihood_Occupation_7   5.696861e-02  5.711567e-02  5.723680e-02
Livelihood_Occupation_9   7.835334e-02  7.855689e-02  7.872565e-02
Livelihood_Occupation_17 -6.336828e-01 -6.339511e-01 -6.342011e-01
                                                                  
(Intercept)               8.512464e-01  8.512957e-01  8.513393e-01
(Intercept)               .             .             .           
Number_Trips              1.000748e-02  1.002857e-02  1.004692e-02
Rent_per_Month            1.214496e-05  1.216355e-05  1.217935e-05
Monthly_Remittances       1.433225e-05  1.433496e-05  1.433708e-05
Wage_Last_Head            6.553335e-06  6.563446e-06  6.572408e-06
Duration_of_stay         -1.347744e-04 -1.349563e-04 -1.351149e-04
Education_Level_3        -5.655114e-02 -5.680482e-02 -5.702699e-02
Education_Level_5        -9.820309e-02 -9.856851e-02 -9.888847e-02
Education_Level_6        -1.947264e-01 -1.951977e-01 -1.956099e-01
Education_Level_7        -1.773010e-01 -1.776876e-01 -1.780260e-01
Education_Level_8        -2.170504e-01 -2.174234e-01 -2.177496e-01
Livelihood_Occupation_7   5.734755e-02  5.744495e-02  5.752984e-02
Livelihood_Occupation_9   7.887872e-02  7.901348e-02  7.913103e-02
Livelihood_Occupation_17 -6.344046e-01 -6.345794e-01 -6.347311e-01


# Calculate the test MSE
elastic_mse <- mean((elastic.pred - y[test])^2)

# Print and report test error
print("Report Test Error:")
cat("Elastic Net MSE:", elastic_mse, "\n")
print("Elastic Net Regression combines Lasso and Ridge regularization.")

# Plot of non-zero coefficient estimates
plot(elastic.mod, xvar = "lambda")

# Get final model with selected lambda
elastic.mod.final <- glmnet(x[train, ], y[train], alpha = 0.5, lambda = bestlam)

# Sparse matrix
[1] "Report Test Error:"
Elastic Net MSE: 0.08502231 
[1] "Elastic Net Regression combines Lasso and Ridge regularization."

From Line 487 in feature_select_r_sheeba.qmd

Note about Feature Selection

Feature selection was performed on logistic regression model. Then the selected features were used for Ridge, Lasso, and Elastic. Feature selection was not repeated on ensemble methods to see if ensemble methods prioritized different features in the models.

Decision Tree

set.seed(123) # for reproducibility

train_idx <- createDataPartition(df1$Work_Earn_Money_1, p = 0.8, list = FALSE)
train <- df1[train_idx,]
test <- df1[-train_idx,] 

tree_model <- rpart(Work_Earn_Money_1 ~ ., data = train, method = "class")
library(rpart.plot)

rpart.plot(tree_model, main="Decision Tree Model", extra=102)  # extra=102 to show node numbers and splits
predictions_prob <- predict(tree_model, newdata = test, type = "prob")
results <- evaluate_model(tree_model, test$Work_Earn_Money_1, predictions_prob[,2], plot_roc = TRUE, show_summary = TRUE)

print(results)
Setting levels: control = 0, case = 1

Setting direction: controls < cases

Call:
rpart(formula = Work_Earn_Money_1 ~ ., data = train, method = "class")
  n= 1600 

          CP nsplit rel error    xerror       xstd
1 0.59760956      0 1.0000000 1.0000000 0.05795743
2 0.01394422      1 0.4023904 0.4063745 0.03893340
3 0.01000000      3 0.3745020 0.4262948 0.03980962

Variable importance
            Paid_in_Taka Livelihood_Occupation_17              Food_budget 
                      72                       16                        6 
          Wage_Last_Head          Wage_First_Head         Duration_of_stay 
                       3                        3                        1 

Node number 1: 1600 observations,    complexity param=0.5976096
  predicted class=1  expected loss=0.156875  P(node) =1
    class counts:   251  1349
   probabilities: 0.157 0.843 
  left son=2 (210 obs) right son=3 (1390 obs)
  Primary splits:
      Paid_in_Taka             < 1    to the left,  improve=237.07340, (0 missing)
      Livelihood_Occupation_17 < 0.5  to the right, improve= 92.04358, (0 missing)
      Food_budget              < 1.5  to the left,  improve= 79.03357, (0 missing)
      Monthly_Remittances      < 1    to the left,  improve= 67.25052, (0 missing)
      Monthly_Savings          < 1.5  to the left,  improve= 38.83325, (0 missing)
  Surrogate splits:
      Livelihood_Occupation_17 < 0.5  to the right, agree=0.898, adj=0.224, (0 split)
      Food_budget              < 1.5  to the left,  agree=0.879, adj=0.081, (0 split)

Node number 2: 210 observations,    complexity param=0.01394422
  predicted class=0  expected loss=0.1428571  P(node) =0.13125
    class counts:   180    30
   probabilities: 0.857 0.143 
  left son=4 (186 obs) right son=5 (24 obs)
  Primary splits:
      Wage_Last_Head           < 750  to the left,  improve=8.619432, (0 missing)
      Wage_First_Head          < 450  to the left,  improve=7.569876, (0 missing)
      Education_Level_2        < 0.5  to the left,  improve=3.752896, (0 missing)
      Livelihood_Occupation_17 < 0.5  to the right, improve=3.038709, (0 missing)
      Livelihood_Occupation_2  < 0.5  to the left,  improve=2.814749, (0 missing)
  Surrogate splits:
      Wage_First_Head < 450  to the left,  agree=0.981, adj=0.833, (0 split)

Node number 3: 1390 observations
  predicted class=1  expected loss=0.05107914  P(node) =0.86875
    class counts:    71  1319
   probabilities: 0.051 0.949 

Node number 4: 186 observations
  predicted class=0  expected loss=0.09139785  P(node) =0.11625
    class counts:   169    17
   probabilities: 0.909 0.091 

Node number 5: 24 observations,    complexity param=0.01394422
  predicted class=1  expected loss=0.4583333  P(node) =0.015
    class counts:    11    13
   probabilities: 0.458 0.542 
  left son=10 (7 obs) right son=11 (17 obs)
  Primary splits:
      Duration_of_stay         < 102  to the right, improve=3.1435570, (0 missing)
      Wage_First_Head          < 6500 to the right, improve=2.0416670, (0 missing)
      Wage_Last_Head           < 9500 to the right, improve=2.0416670, (0 missing)
      Age_First_Marriage       < 20   to the left,  improve=0.3082751, (0 missing)
      Livelihood_Occupation_17 < 0.5  to the right, improve=0.2528011, (0 missing)
  Surrogate splits:
      Wage_First_Head          < 8500 to the right, agree=0.875, adj=0.571, (0 split)
      Wage_Last_Head           < 9500 to the right, agree=0.792, adj=0.286, (0 split)
      Livelihood_Occupation_10 < 0.5  to the right, agree=0.750, adj=0.143, (0 split)

Node number 10: 7 observations
  predicted class=0  expected loss=0.1428571  P(node) =0.004375
    class counts:     6     1
   probabilities: 0.857 0.143 

Node number 11: 17 observations
  predicted class=1  expected loss=0.2941176  P(node) =0.010625
    class counts:     5    12
   probabilities: 0.294 0.706 

n= 1600 

node), split, n, loss, yval, (yprob)
      * denotes terminal node

 1) root 1600 251 1 (0.15687500 0.84312500)  
   2) Paid_in_Taka< 1 210  30 0 (0.85714286 0.14285714)  
     4) Wage_Last_Head< 750 186  17 0 (0.90860215 0.09139785) *
     5) Wage_Last_Head>=750 24  11 1 (0.45833333 0.54166667)  
      10) Duration_of_stay>=102 7   1 0 (0.85714286 0.14285714) *
      11) Duration_of_stay< 102 17   5 1 (0.29411765 0.70588235) *
   3) Paid_in_Taka>=1 1390  71 1 (0.05107914 0.94892086) *
$Accuracy
Accuracy 
  0.9425 

$Precision
Precision 
0.9382022 

$Recall
Sensitivity 
  0.9970149 

$Specificity
Specificity 
  0.6615385 

$AUC
Area under the curve: 0.8354

$ROC

Call:
roc.default(response = actual, predictor = predicted_prob)

Data: predicted_prob in 65 controls (actual 0) < 335 cases (actual 1).
Area under the curve: 0.8354

Decision Tree (Hyperparameter Tuned)


control <- rpart.control(minsplit = 20, minbucket = 7, maxdepth=30)
fit <- rpart(Work_Earn_Money_1 ~ ., data=train, method="class", control=control)

Decision Tree Plot

rpart.plot(fit, main="Decision Tree Model (Hyperparameter Tuning)", extra=102)  # extra=102 to show node numbers and splits
predictions_prob_rf <- predict(fit, newdata = test, type = "prob")
results <- evaluate_model(fit, test$Work_Earn_Money_1, predictions_prob_rf[,2], plot_roc = TRUE, show_summary = TRUE)


roc_curve_decision <- roc(test$Work_Earn_Money_1, predictions_prob_rf[, 2])
# Save AUC
auc_decision <- round(results$Accuracy, 2)

# Plot ROC curve
plot(roc_curve_lasso, main = "ROC Curve", col = "blue", lwd = 2)
plot(roc_curve_elastic, main = "ROC Curve", col = "green", add=TRUE)
plot(roc_curve_decision, main = "ROC Curve", col = "red", add=TRUE)

# Add a legend
legend("bottomright", legend = c("Curve 1", "Curve 2", "Curve 3"),
       col = c("blue", "red", "green"), lty = 1, lwd = 2)
Setting levels: control = 0, case = 1

Setting direction: controls < cases

Setting levels: control = 0, case = 1

Setting direction: controls < cases

Call:
rpart(formula = Work_Earn_Money_1 ~ ., data = train, method = "class", 
    control = control)
  n= 1600 

          CP nsplit rel error    xerror       xstd
1 0.59760956      0 1.0000000 1.0000000 0.05795743
2 0.01394422      1 0.4023904 0.4023904 0.03875500
3 0.01000000      3 0.3745020 0.4143426 0.03928699

Variable importance
            Paid_in_Taka Livelihood_Occupation_17              Food_budget 
                      72                       16                        6 
          Wage_Last_Head          Wage_First_Head         Duration_of_stay 
                       3                        3                        1 

Node number 1: 1600 observations,    complexity param=0.5976096
  predicted class=1  expected loss=0.156875  P(node) =1
    class counts:   251  1349
   probabilities: 0.157 0.843 
  left son=2 (210 obs) right son=3 (1390 obs)
  Primary splits:
      Paid_in_Taka             < 1    to the left,  improve=237.07340, (0 missing)
      Livelihood_Occupation_17 < 0.5  to the right, improve= 92.04358, (0 missing)
      Food_budget              < 1.5  to the left,  improve= 79.03357, (0 missing)
      Monthly_Remittances      < 1    to the left,  improve= 67.25052, (0 missing)
      Monthly_Savings          < 1.5  to the left,  improve= 38.83325, (0 missing)
  Surrogate splits:
      Livelihood_Occupation_17 < 0.5  to the right, agree=0.898, adj=0.224, (0 split)
      Food_budget              < 1.5  to the left,  agree=0.879, adj=0.081, (0 split)

Node number 2: 210 observations,    complexity param=0.01394422
  predicted class=0  expected loss=0.1428571  P(node) =0.13125
    class counts:   180    30
   probabilities: 0.857 0.143 
  left son=4 (186 obs) right son=5 (24 obs)
  Primary splits:
      Wage_Last_Head           < 750  to the left,  improve=8.619432, (0 missing)
      Wage_First_Head          < 450  to the left,  improve=7.569876, (0 missing)
      Education_Level_2        < 0.5  to the left,  improve=3.752896, (0 missing)
      Livelihood_Occupation_17 < 0.5  to the right, improve=3.038709, (0 missing)
      Livelihood_Occupation_2  < 0.5  to the left,  improve=2.814749, (0 missing)
  Surrogate splits:
      Wage_First_Head < 450  to the left,  agree=0.981, adj=0.833, (0 split)

Node number 3: 1390 observations
  predicted class=1  expected loss=0.05107914  P(node) =0.86875
    class counts:    71  1319
   probabilities: 0.051 0.949 

Node number 4: 186 observations
  predicted class=0  expected loss=0.09139785  P(node) =0.11625
    class counts:   169    17
   probabilities: 0.909 0.091 

Node number 5: 24 observations,    complexity param=0.01394422
  predicted class=1  expected loss=0.4583333  P(node) =0.015
    class counts:    11    13
   probabilities: 0.458 0.542 
  left son=10 (7 obs) right son=11 (17 obs)
  Primary splits:
      Duration_of_stay         < 102  to the right, improve=3.1435570, (0 missing)
      Wage_First_Head          < 6500 to the right, improve=2.0416670, (0 missing)
      Wage_Last_Head           < 9500 to the right, improve=2.0416670, (0 missing)
      Age_First_Marriage       < 20   to the left,  improve=0.3082751, (0 missing)
      Livelihood_Occupation_17 < 0.5  to the right, improve=0.2528011, (0 missing)
  Surrogate splits:
      Wage_First_Head          < 8500 to the right, agree=0.875, adj=0.571, (0 split)
      Wage_Last_Head           < 9500 to the right, agree=0.792, adj=0.286, (0 split)
      Livelihood_Occupation_10 < 0.5  to the right, agree=0.750, adj=0.143, (0 split)

Node number 10: 7 observations
  predicted class=0  expected loss=0.1428571  P(node) =0.004375
    class counts:     6     1
   probabilities: 0.857 0.143 

Node number 11: 17 observations
  predicted class=1  expected loss=0.2941176  P(node) =0.010625
    class counts:     5    12
   probabilities: 0.294 0.706 

n= 1600 

node), split, n, loss, yval, (yprob)
      * denotes terminal node

 1) root 1600 251 1 (0.15687500 0.84312500)  
   2) Paid_in_Taka< 1 210  30 0 (0.85714286 0.14285714)  
     4) Wage_Last_Head< 750 186  17 0 (0.90860215 0.09139785) *
     5) Wage_Last_Head>=750 24  11 1 (0.45833333 0.54166667)  
      10) Duration_of_stay>=102 7   1 0 (0.85714286 0.14285714) *
      11) Duration_of_stay< 102 17   5 1 (0.29411765 0.70588235) *
   3) Paid_in_Taka>=1 1390  71 1 (0.05107914 0.94892086) *

Decision Tree with Hyperparameter tuning results

specificity <- results$Specificity
accuracy <- results$Accuracy
sensitivities <- results$Recall
precision <- results$Precision

print(specificity)
print(accuracy)
print(sensitivities)
print(precision)
Specificity 
  0.6615385 
Accuracy 
  0.9425 
Sensitivity 
  0.9970149 
Precision 
0.9382022 

Random Forest

train_idx <- createDataPartition(df1$Work_Earn_Money_1, p = 0.8, list = FALSE)
train <- df1[train_idx,]
test <- df1[-train_idx,] 

train$Work_Earn_Money_1 <- factor(train$Work_Earn_Money_1)
test$Work_Earn_Money_1 <- factor(test$Work_Earn_Money_1)

rf <- randomForest(Work_Earn_Money_1~., data=train, type='classification', proximity=TRUE, importance=TRUE)
print(rf)

rf_predict <- predict(rf, newdata=test, type='prob')

Call:
 randomForest(formula = Work_Earn_Money_1 ~ ., data = train, type = "classification",      proximity = TRUE, importance = TRUE) 
               Type of random forest: classification
                     Number of trees: 500
No. of variables tried at each split: 7

        OOB estimate of  error rate: 5.94%
Confusion matrix:
    0    1 class.error
0 179   73  0.28968254
1  22 1326  0.01632047
# with mtry
TN <- 173
FP <- 80
FN <- 16
TP <- 1331

# Calculate metrics
accuracy <- (TP + TN) / (TP + TN + FP + FN)
precision <- TP / (TP + FP)
recall <- TP / (TP + FN)  # Also known as sensitivity
specificity <- TN / (TN + FP)

# Calculate recall (also called sensitivity)
recall <- TP / (TP + FN)

# Print the results
cat(sprintf("Accuracy: %f\n", accuracy))
cat(sprintf("Precision: %f\n", precision))
cat(sprintf("Recall (Sensitivity): %f\n", recall))
cat(sprintf("Specificity: %f\n", specificity))
cat(sprintf("Sensitivity: %f\n", recall))
Accuracy: 0.940000
Precision: 0.943303
Recall (Sensitivity): 0.988122
Specificity: 0.683794
Sensitivity: 0.988122
# Extracting the probabilities of the positive class (assuming it's the first class)
prob_positive_class <- rf_predict[, "1"]

# Compute ROC curve
roc_curve_random <- roc(test$Work_Earn_Money_1, prob_positive_class)
# Save AUC
auc_random <- round(auc(roc_curve_random), 2)


# Plot ROC curve
plot(roc_curve_lasso, main = "ROC Curve", col = "blue", lwd = 2)
plot(roc_curve_elastic, main = "ROC Curve", col = "green", add=TRUE)
plot(roc_curve_decision, main = "ROC Curve", col = "red", add=TRUE)

plot(roc_curve_random, main = "ROC Curve", col = "purple", add=TRUE)
# Add AUC value to the plot


# Add a legend
legend("bottomright", legend = c("Curve 1", "Curve 2", "Curve 3", "Curve 4"),
       col = c("blue", "red", "green", "purple"), lty = 1, lwd = 2)
text(0.8, 0.2, paste("AUC =", round(auc(roc_curve_random), 2)), adj = 0)

roc_curve <- roc(test$Work_Earn_Money_1, prob_positive_class)

# Plot ROC curve
plot(roc_curve, main = "ROC Curve", col = "blue")
# Add AUC value to the plot
text(0.8, 0.2, paste("AUC =", round(auc(roc_curve), 2)), adj = 0)
Setting levels: control = 0, case = 1

Setting direction: controls < cases

Setting levels: control = 0, case = 1

Setting direction: controls < cases

Variable Importance

# Plot variable importance
varImpPlot(rf, main = "Variable Importance Plot", cex = 0.6, pch = 19)

XG Boost

library(xgboost)
library(caTools)
library(dplyr)
library(caret)

set.seed(42)

train_idx <- createDataPartition(df1$Work_Earn_Money_1, p = 0.8, list = FALSE)
train <- df1[train_idx,]
test <- df1[-train_idx,] 


# Split the dataset into training and testing sets
X_train <- df1[train_idx, -which(names(df1) == "Work_Earn_Money_1")]  # Exclude the target variable from the training set
y_train <- df1[train_idx, "Work_Earn_Money_1"]   # Extract the target variable for the training set

X_test <- df1[-train_idx, -which(names(df1) == "Work_Earn_Money_1")]  # Exclude the target variable from the testing set
y_test <- df1[-train_idx, "Work_Earn_Money_1"]   # Extract the target variable for the testing set


xgb_train <- xgb.DMatrix(data = as.matrix(X_train), label = y_train)
xgb_test <- xgb.DMatrix(data = as.matrix(X_test), label = y_test)
xgb_params <- list(
  booster = "gbtree",
  eta = 0.01,
  max_depth = 2,
  gamma = 4,
  subsample = 0.75,
  colsample_bytree = 1,
  objective = "binary:logistic",
  eval_metric = "mlogloss",
  num_class = length(levels(df1$D1A_1V1L))
)

xgb_model <- xgb.train(
  params = xgb_params,
  data = xgb_train,
  nrounds = 5000,
  verbose = 1
)
xgb_model
##### xgb.Booster
raw: 4.2 Mb 
call:
  xgb.train(params = xgb_params, data = xgb_train, nrounds = 5000, 
    verbose = 1)
params (as set within xgb.train):
  booster = "gbtree", eta = "0.01", max_depth = "2", gamma = "4", subsample = "0.75", colsample_bytree = "1", objective = "binary:logistic", eval_metric = "mlogloss", num_class = "0", validate_parameters = "TRUE"
xgb.attributes:
  niter
callbacks:
  cb.print.evaluation(period = print_every_n)
# of features: 58 
niter: 5000
nfeatures : 58 
# Make predictions on the test set
predictions <- predict(xgb_model, xgb_test)

# Convert probabilities to class labels, assign greater than 0.5 to Positive
pred_classes <- ifelse(predictions > 0.5, "Positive", "Negative")

# Create the confusion matrix
cm <- table(Actual = test$Work_Earn_Money_1, Predicted = pred_classes)

# Confusion Matrix
print(cm)
      Predicted
Actual Negative Positive
     0       58       16
     1        7      319
# Calculate True Positives (TP), True Negatives (TN), False Positives (FP), False Negatives (FN)
TP <- 319
TN <- 58
FP <- 16
FN <- 7

# Calculate accuracy
accuracy <- (TP + TN) / sum(cm)

# Calculate precision
precision <- TP / (TP + FP)

# Calculate recall (also called sensitivity)
recall <- TP / (TP + FN)

# Calculate F1 score
F1 <- 2 * (precision * recall) / (precision + recall)


#Specificity
# Calculate specificity
specificity <- (TN / (TN + FP))

# Print the metrics
cat("Accuracy:", accuracy, "\n")
cat("Precision:", precision, "\n")
cat("Recall:", recall, "\n")
cat("F1 Score:", F1, "\n")

cat("Specificity:", specificity)
Accuracy: 0.9425 
Precision: 0.9522388 
Recall: 0.9785276 
F1 Score: 0.9652042 
Specificity: 0.7837838
library(pROC)

pred_probs <- predict(xgb_model, xgb_test)
# Compute ROC curve

roc_curve_xg <- roc(y_test, pred_probs)

auc_xg <- round(auc(roc_curve_xg), 2)

# Add AUC
auc_lr <- paste("Logistic Regression,", auc_lr)
auc_lasso <- paste("Lasso AUC,", auc_lasso)
auc_elastic <- paste("Elastic AUC,", auc_elastic)
auc_decision <- paste("Decision Tree with Hyperparameter \nTuning,", auc_decision)
auc_random <- paste("Random Forest,", auc_random)
auc_xg <- paste("XG Boost AUC,", auc_xg)

# Including Prof Nakul's comment to adjust x-axis correctly
#ggroc(roc_curve_lr, legacy.axes = T)

# Plot ROC curve
plot(roc_curve_lr, legacy.axes = T, main = "Comparison of ROC Curves", col = "#597fd2")
plot(roc_curve_lasso, legacy.axes = T, main = "Comparison of ROC Curves", col = "#ec5f4c", lwd = 2, add=TRUE)
plot(roc_curve_elastic, legacy.axes = T, main = "Comparison of ROC Curves", col = "#ffc929", add=TRUE)
plot(roc_curve_random, legacy.axes = T, main = "Comparison of ROC Curves", col = "#006f3c", add=TRUE)
plot(roc_curve_xg, legacy.axes = T, main = "Comparison of ROC Curves", col = "#ff0090", add=TRUE)
plot(roc_curve_decision, legacy.axes = T, main = "Comparison of ROC Curves", col = "#6a4477", add=TRUE)


# Add Legend
legend("bottomright", title="AUC Values", legend = c(auc_lr, auc_lasso, auc_elastic, auc_random, auc_xg,  auc_decision), col = c("#597fd2", "#ec5f4c", "#ffc929", "#006f3c", "#ff0090", "#7d1189"), lty = 1, lwd = 2, bg = rgb(1, 1, 1, alpha = 0.7))
Setting levels: control = 0, case = 1

Setting direction: controls < cases

XG Boost Tree Plot

# plot the first tree
tree_plot <- xgb.plot.tree(model = xgb_model, trees = 3)
print(tree_plot)
library(xgboost)

# Make sure your environment can display graphics
# Assuming 'xgb_model' is already trained
xgb.plot.tree(model = xgb_model, trees = 3)
# create plot object of XGBoost tree
tree_plot <- xgb.plot.tree(model = xgb_model, trees = 3, plot_width = 1000, 
                           plot_height = 1000, render = FALSE)

# export plot object to file
export_graph(tree_plot, "xgboost_tree_plot.pdf", width = 1000, height = 1000)

roc_curve <- roc(y_test, pred_probs)

# Plot ROC curve
plot(roc_curve, main = "ROC Curve", col = "blue")

auc_value <- auc(roc_curve)
cat(sprintf("AUC: %f", auc_value))
Setting levels: control = 0, case = 1

Setting direction: controls < cases
AUC: 0.981471

Variable Importance for XGBoost

# Get variable importance
importance <- xgb.importance(model = xgb_model)

# Print the variable importance
print(importance)
                     Feature         Gain        Cover    Frequency
 1:             Paid_in_Taka 0.5612330288 0.2414881768 0.1472964574
 2:           Wage_Last_Head 0.0609914469 0.0679072500 0.1202610317
 3:      Monthly_Remittances 0.0529529219 0.1133333039 0.0733374767
 4:         Duration_of_stay 0.0495385629 0.1159481241 0.0978868863
 5: Livelihood_Occupation_17 0.0446480903 0.0746981992 0.0540708515
 6:       Age_First_Marriage 0.0415233500 0.0804492824 0.0876320696
 7:          Wage_First_Head 0.0350530518 0.0580184588 0.0798632691
 8:           Rent_per_Month 0.0304779481 0.0504952772 0.0649471722
 9:          Monthly_Savings 0.0177077918 0.0226183753 0.0385332505
10:              Food_budget 0.0164503521 0.0300356659 0.0413300186
11:             Number_Trips 0.0127972706 0.0209860825 0.0267246737
12:        Education_Level_8 0.0107921067 0.0229505020 0.0248601616
13:      Saving_brought_Home 0.0102594648 0.0157722135 0.0208203853
14:       Can_write_letter_2 0.0097427315 0.0184533893 0.0236171535
15:        Education_Level_2 0.0096760378 0.0061490748 0.0155376010
16:  Livelihood_Occupation_9 0.0078442205 0.0176617918 0.0180236172
17: Livelihood_Occupation_12 0.0058585761 0.0090102716 0.0105655687
18:        Education_Level_7 0.0053102806 0.0126347656 0.0146053449
19:  Livelihood_Occupation_2 0.0036721257 0.0040203959 0.0083903045
20: Livelihood_Occupation_13 0.0027524912 0.0014878333 0.0055935364
21:        Education_Level_4 0.0025741005 0.0058271179 0.0068365444
22:         Month_Arrival_98 0.0024378208 0.0027718135 0.0059042884
23: Livelihood_Occupation_11 0.0020747526 0.0023175921 0.0046612803
24:  Livelihood_Occupation_7 0.0013225912 0.0031107330 0.0034182722
25:          Month_Arrival_2 0.0012421704 0.0008629921 0.0027967682
26:        Education_Level_5 0.0006852165 0.0004356680 0.0015537601
27:        Education_Level_3 0.0003814974 0.0005556492 0.0009322561
                     Feature         Gain        Cover    Frequency